<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url><loc>https://scifaro.com/en/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654</loc><lastmod>2015-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/><xhtml:link rel="alternate" hreflang="zh" href="https://scifaro.com/zh/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/></url>
<url><loc>https://scifaro.com/zh/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654</loc><lastmod>2015-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/><xhtml:link rel="alternate" hreflang="zh" href="https://scifaro.com/zh/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-area-damage-detection-in-composite-structures-using-piezoelectric-transducers-0705.4654"/></url>
<url><loc>https://scifaro.com/en/abs/dsp-based-system-for-real-time-voice-synthesis-applications-development-0803.0197</loc><lastmod>2008-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsp-based-system-for-real-time-voice-synthesis-applications-development-0803.0197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsp-based-system-for-real-time-voice-synthesis-applications-development-0803.0197"/></url>
<url><loc>https://scifaro.com/en/abs/a-synthesizer-based-on-frequency-phase-analysis-and-square-waves-0804.3241</loc><lastmod>2013-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-synthesizer-based-on-frequency-phase-analysis-and-square-waves-0804.3241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-synthesizer-based-on-frequency-phase-analysis-and-square-waves-0804.3241"/></url>
<url><loc>https://scifaro.com/en/abs/a-statistical-approach-to-modeling-indian-classical-music-performance-0809.3214</loc><lastmod>2008-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-statistical-approach-to-modeling-indian-classical-music-performance-0809.3214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-statistical-approach-to-modeling-indian-classical-music-performance-0809.3214"/></url>
<url><loc>https://scifaro.com/en/abs/which-notes-are-vadi-samvadi-in-raga-rageshree-0812.0706</loc><lastmod>2008-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/which-notes-are-vadi-samvadi-in-raga-rageshree-0812.0706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/which-notes-are-vadi-samvadi-in-raga-rageshree-0812.0706"/></url>
<url><loc>https://scifaro.com/en/abs/tr01-time-continuous-sparse-imputation-0901.2416</loc><lastmod>2009-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tr01-time-continuous-sparse-imputation-0901.2416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tr01-time-continuous-sparse-imputation-0901.2416"/></url>
<url><loc>https://scifaro.com/en/abs/iklax-a-new-musical-audio-format-for-active-listening-0901.3902</loc><lastmod>2009-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iklax-a-new-musical-audio-format-for-active-listening-0901.3902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iklax-a-new-musical-audio-format-for-active-listening-0901.3902"/></url>
<url><loc>https://scifaro.com/en/abs/new-ica-beamforming-method-to-under-determined-bss-0902.2783</loc><lastmod>2010-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/new-ica-beamforming-method-to-under-determined-bss-0902.2783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/new-ica-beamforming-method-to-under-determined-bss-0902.2783"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-theory-of-time-frequency-reassignment-0903.3080</loc><lastmod>2009-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-theory-of-time-frequency-reassignment-0903.3080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-theory-of-time-frequency-reassignment-0903.3080"/></url>
<url><loc>https://scifaro.com/en/abs/tr02-state-dependent-oracle-masks-for-improved-dynamical-features-0903.3198</loc><lastmod>2009-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tr02-state-dependent-oracle-masks-for-improved-dynamical-features-0903.3198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tr02-state-dependent-oracle-masks-for-improved-dynamical-features-0903.3198"/></url>
<url><loc>https://scifaro.com/en/abs/the-modular-audio-recognition-framework-marf-and-its-applications-scientific-and-software-engineering-notes-0905.1235</loc><lastmod>2019-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-modular-audio-recognition-framework-marf-and-its-applications-scientific-and-software-engineering-notes-0905.1235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-modular-audio-recognition-framework-marf-and-its-applications-scientific-and-software-engineering-notes-0905.1235"/></url>
<url><loc>https://scifaro.com/en/abs/major-and-minor-the-formula-of-musical-emotions-0905.3678</loc><lastmod>2009-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/major-and-minor-the-formula-of-musical-emotions-0905.3678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/major-and-minor-the-formula-of-musical-emotions-0905.3678"/></url>
<url><loc>https://scifaro.com/en/abs/inter-genre-similarity-modelling-for-automatic-music-genre-classification-0907.3220</loc><lastmod>2018-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inter-genre-similarity-modelling-for-automatic-music-genre-classification-0907.3220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inter-genre-similarity-modelling-for-automatic-music-genre-classification-0907.3220"/></url>
<url><loc>https://scifaro.com/en/abs/codebook-design-method-for-noise-robust-speaker-identification-based-on-genetic-algorithm-0909.0599</loc><lastmod>2009-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codebook-design-method-for-noise-robust-speaker-identification-based-on-genetic-algorithm-0909.0599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codebook-design-method-for-noise-robust-speaker-identification-based-on-genetic-algorithm-0909.0599"/></url>
<url><loc>https://scifaro.com/en/abs/improvement-of-text-dependent-speaker-identification-system-using-neuro-genetic-hybrid-algorithm-in-office-environmental-conditions-0909.2363</loc><lastmod>2009-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvement-of-text-dependent-speaker-identification-system-using-neuro-genetic-hybrid-algorithm-in-office-environmental-conditions-0909.2363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvement-of-text-dependent-speaker-identification-system-using-neuro-genetic-hybrid-algorithm-in-office-environmental-conditions-0909.2363"/></url>
<url><loc>https://scifaro.com/en/abs/the-information-theory-of-emotions-of-musical-chords-0909.3976</loc><lastmod>2011-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-information-theory-of-emotions-of-musical-chords-0909.3976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-information-theory-of-emotions-of-musical-chords-0909.3976"/></url>
<url><loc>https://scifaro.com/en/abs/noise-speech-wavelet-analyzing-in-special-time-ranges-0911.3538</loc><lastmod>2009-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-speech-wavelet-analyzing-in-special-time-ranges-0911.3538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-speech-wavelet-analyzing-in-special-time-ranges-0911.3538"/></url>
<url><loc>https://scifaro.com/en/abs/g3-genesis-software-envrionment-update-0911.4642</loc><lastmod>2009-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/g3-genesis-software-envrionment-update-0911.4642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/g3-genesis-software-envrionment-update-0911.4642"/></url>
<url><loc>https://scifaro.com/en/abs/untangling-phase-and-time-in-monophonic-sounds-0911.5171</loc><lastmod>2011-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/untangling-phase-and-time-in-monophonic-sounds-0911.5171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/untangling-phase-and-time-in-monophonic-sounds-0911.5171"/></url>
<url><loc>https://scifaro.com/en/abs/a-digital-guitar-tuner-0912.0745</loc><lastmod>2009-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-digital-guitar-tuner-0912.0745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-digital-guitar-tuner-0912.0745"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-of-the-letter-zha-in-tamil-language-using-hmm-1001.4190</loc><lastmod>2010-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-of-the-letter-zha-in-tamil-language-using-hmm-1001.4190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-of-the-letter-zha-in-tamil-language-using-hmm-1001.4190"/></url>
<url><loc>https://scifaro.com/en/abs/up-sampling-and-natural-sample-value-computation-for-digital-pulse-width-modulators-1003.2441</loc><lastmod>2010-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/up-sampling-and-natural-sample-value-computation-for-digital-pulse-width-modulators-1003.2441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/up-sampling-and-natural-sample-value-computation-for-digital-pulse-width-modulators-1003.2441"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-analyses-of-action-related-impact-sounds-1003.4908</loc><lastmod>2010-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-analyses-of-action-related-impact-sounds-1003.4908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-analyses-of-action-related-impact-sounds-1003.4908"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-language-identification-using-hybrid-feature-extraction-methods-1003.5623</loc><lastmod>2010-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-language-identification-using-hybrid-feature-extraction-methods-1003.5623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-language-identification-using-hybrid-feature-extraction-methods-1003.5623"/></url>
<url><loc>https://scifaro.com/en/abs/wavelet-based-mel-frequency-cepstral-coefficients-for-speaker-identification-using-hidden-markov-models-1003.5627</loc><lastmod>2010-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavelet-based-mel-frequency-cepstral-coefficients-for-speaker-identification-using-hidden-markov-models-1003.5627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavelet-based-mel-frequency-cepstral-coefficients-for-speaker-identification-using-hidden-markov-models-1003.5627"/></url>
<url><loc>https://scifaro.com/en/abs/intelligent-system-for-speaker-identification-using-lip-features-with-pca-and-ica-1004.4478</loc><lastmod>2010-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intelligent-system-for-speaker-identification-using-lip-features-with-pca-and-ica-1004.4478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intelligent-system-for-speaker-identification-using-lip-features-with-pca-and-ica-1004.4478"/></url>
<url><loc>https://scifaro.com/en/abs/dichotic-harmony-for-the-musical-practice-1005.2465</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dichotic-harmony-for-the-musical-practice-1005.2465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dichotic-harmony-for-the-musical-practice-1005.2465"/></url>
<url><loc>https://scifaro.com/en/abs/improved-method-for-individualization-of-head-related-transfer-functions-on-horizontal-plane-using-reduced-number-of-anthropometric-measurements-1005.5137</loc><lastmod>2010-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-method-for-individualization-of-head-related-transfer-functions-on-horizontal-plane-using-reduced-number-of-anthropometric-measurements-1005.5137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-method-for-individualization-of-head-related-transfer-functions-on-horizontal-plane-using-reduced-number-of-anthropometric-measurements-1005.5137"/></url>
<url><loc>https://scifaro.com/en/abs/treatment-the-effects-of-studio-wall-resonance-and-coincidence-phenomena-for-recording-noisy-speech-via-fpga-digital-filter-1006.0831</loc><lastmod>2010-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/treatment-the-effects-of-studio-wall-resonance-and-coincidence-phenomena-for-recording-noisy-speech-via-fpga-digital-filter-1006.0831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/treatment-the-effects-of-studio-wall-resonance-and-coincidence-phenomena-for-recording-noisy-speech-via-fpga-digital-filter-1006.0831"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-the-interactive-hopscotch-game-for-the-children-using-computer-music-techniques-1006.0866</loc><lastmod>2010-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-the-interactive-hopscotch-game-for-the-children-using-computer-music-techniques-1006.0866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-the-interactive-hopscotch-game-for-the-children-using-computer-music-techniques-1006.0866"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-infants-cry-fundamental-frequency-using-a-modified-sift-algorithm-1009.2796</loc><lastmod>2010-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-infants-cry-fundamental-frequency-using-a-modified-sift-algorithm-1009.2796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-infants-cry-fundamental-frequency-using-a-modified-sift-algorithm-1009.2796"/></url>
<url><loc>https://scifaro.com/en/abs/a-fast-audio-clustering-using-vector-quantization-and-second-order-statistics-1009.4719</loc><lastmod>2010-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fast-audio-clustering-using-vector-quantization-and-second-order-statistics-1009.4719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fast-audio-clustering-using-vector-quantization-and-second-order-statistics-1009.4719"/></url>
<url><loc>https://scifaro.com/en/abs/approximate-maximum-a-posteriori-inference-with-entropic-priors-1009.5761</loc><lastmod>2010-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approximate-maximum-a-posteriori-inference-with-entropic-priors-1009.5761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approximate-maximum-a-posteriori-inference-with-entropic-priors-1009.5761"/></url>
<url><loc>https://scifaro.com/en/abs/should-corpora-be-big-rich-or-dense-1012.2797</loc><lastmod>2010-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/should-corpora-be-big-rich-or-dense-1012.2797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/should-corpora-be-big-rich-or-dense-1012.2797"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-gross-alignment-errors-in-the-spoken-british-national-corpus-1101.1682</loc><lastmod>2011-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-gross-alignment-errors-in-the-spoken-british-national-corpus-1101.1682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-gross-alignment-errors-in-the-spoken-british-national-corpus-1101.1682"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-rate-aware-noise-generation-1103.4118</loc><lastmod>2011-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-rate-aware-noise-generation-1103.4118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-rate-aware-noise-generation-1103.4118"/></url>
<url><loc>https://scifaro.com/en/abs/an-automatic-volume-control-for-preserving-intelligibility-1104.3544</loc><lastmod>2016-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automatic-volume-control-for-preserving-intelligibility-1104.3544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automatic-volume-control-for-preserving-intelligibility-1104.3544"/></url>
<url><loc>https://scifaro.com/en/abs/topological-considerations-for-tuning-and-fingering-stringed-instruments-1105.1383</loc><lastmod>2011-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topological-considerations-for-tuning-and-fingering-stringed-instruments-1105.1383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topological-considerations-for-tuning-and-fingering-stringed-instruments-1105.1383"/></url>
<url><loc>https://scifaro.com/en/abs/improving-performance-of-speaker-identification-system-using-complementary-information-fusion-1105.2770</loc><lastmod>2015-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-performance-of-speaker-identification-system-using-complementary-information-fusion-1105.2770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-performance-of-speaker-identification-system-using-complementary-information-fusion-1105.2770"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-the-electroweak-phase-transition-sonification-of-bubble-nucleation-1106.0760</loc><lastmod>2015-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-the-electroweak-phase-transition-sonification-of-bubble-nucleation-1106.0760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-the-electroweak-phase-transition-sonification-of-bubble-nucleation-1106.0760"/></url>
<url><loc>https://scifaro.com/en/abs/a-fast-affine-projection-algorithm-based-on-matching-pursuit-in-adaptive-noise-cancellation-for-speech-enhancement-1106.0844</loc><lastmod>2011-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fast-affine-projection-algorithm-based-on-matching-pursuit-in-adaptive-noise-cancellation-for-speech-enhancement-1106.0844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fast-affine-projection-algorithm-based-on-matching-pursuit-in-adaptive-noise-cancellation-for-speech-enhancement-1106.0844"/></url>
<url><loc>https://scifaro.com/en/abs/a-family-of-adaptive-filter-algorithms-in-noise-cancellation-for-speech-enhancement-1106.0846</loc><lastmod>2011-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-family-of-adaptive-filter-algorithms-in-noise-cancellation-for-speech-enhancement-1106.0846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-family-of-adaptive-filter-algorithms-in-noise-cancellation-for-speech-enhancement-1106.0846"/></url>
<url><loc>https://scifaro.com/en/abs/open-loop-multi-channel-inversion-of-room-impulse-response-1106.1199</loc><lastmod>2011-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-loop-multi-channel-inversion-of-room-impulse-response-1106.1199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-loop-multi-channel-inversion-of-room-impulse-response-1106.1199"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-severity-of-speech-disability-through-speech-envelope-1107.4185</loc><lastmod>2011-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-severity-of-speech-disability-through-speech-envelope-1107.4185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-severity-of-speech-disability-through-speech-envelope-1107.4185"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-machine-learning-system-for-harmonic-analysis-of-music-1107.4969</loc><lastmod>2011-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-machine-learning-system-for-harmonic-analysis-of-music-1107.4969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-machine-learning-system-for-harmonic-analysis-of-music-1107.4969"/></url>
<url><loc>https://scifaro.com/en/abs/application-of-gammachirp-auditory-filter-as-a-continuous-wavelet-analysis-1107.5492</loc><lastmod>2011-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/application-of-gammachirp-auditory-filter-as-a-continuous-wavelet-analysis-1107.5492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/application-of-gammachirp-auditory-filter-as-a-continuous-wavelet-analysis-1107.5492"/></url>
<url><loc>https://scifaro.com/en/abs/r-enyi-information-measures-for-spectral-change-detection-1109.5876</loc><lastmod>2011-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/r-enyi-information-measures-for-spectral-change-detection-1109.5876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/r-enyi-information-measures-for-spectral-change-detection-1109.5876"/></url>
<url><loc>https://scifaro.com/en/abs/fractal-string-generation-and-its-application-in-music-composition-1109.6270</loc><lastmod>2011-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fractal-string-generation-and-its-application-in-music-composition-1109.6270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fractal-string-generation-and-its-application-in-music-composition-1109.6270"/></url>
<url><loc>https://scifaro.com/en/abs/a-reduced-multiple-gabor-frame-for-local-time-adaptation-of-the-spectrogram-1109.6313</loc><lastmod>2011-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-reduced-multiple-gabor-frame-for-local-time-adaptation-of-the-spectrogram-1109.6313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-reduced-multiple-gabor-frame-for-local-time-adaptation-of-the-spectrogram-1109.6313"/></url>
<url><loc>https://scifaro.com/en/abs/an-entropy-based-method-for-local-time-adaptation-of-the-spectrogram-1109.6314</loc><lastmod>2011-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-entropy-based-method-for-local-time-adaptation-of-the-spectrogram-1109.6314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-entropy-based-method-for-local-time-adaptation-of-the-spectrogram-1109.6314"/></url>
<url><loc>https://scifaro.com/en/abs/sound-analysis-and-synthesis-adaptive-in-time-and-two-frequency-bands-1109.6651</loc><lastmod>2011-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-analysis-and-synthesis-adaptive-in-time-and-two-frequency-bands-1109.6651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-analysis-and-synthesis-adaptive-in-time-and-two-frequency-bands-1109.6651"/></url>
<url><loc>https://scifaro.com/en/abs/text-independent-speaker-recognition-for-low-snr-environments-with-encryption-1111.0024</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-independent-speaker-recognition-for-low-snr-environments-with-encryption-1111.0024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-independent-speaker-recognition-for-low-snr-environments-with-encryption-1111.0024"/></url>
<url><loc>https://scifaro.com/en/abs/discovering-novel-computer-music-techniques-by-exploring-the-space-of-short-computer-programs-1112.1368</loc><lastmod>2015-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discovering-novel-computer-music-techniques-by-exploring-the-space-of-short-computer-programs-1112.1368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discovering-novel-computer-music-techniques-by-exploring-the-space-of-short-computer-programs-1112.1368"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-framework-for-online-audio-source-separation-1112.6178</loc><lastmod>2011-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-framework-for-online-audio-source-separation-1112.6178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-framework-for-online-audio-source-separation-1112.6178"/></url>
<url><loc>https://scifaro.com/en/abs/harmony-explained-progress-towards-a-scientific-theory-of-music-1202.4212</loc><lastmod>2014-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmony-explained-progress-towards-a-scientific-theory-of-music-1202.4212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmony-explained-progress-towards-a-scientific-theory-of-music-1202.4212"/></url>
<url><loc>https://scifaro.com/en/abs/employing-subsequence-matching-in-audio-data-processing-1204.2541</loc><lastmod>2012-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-subsequence-matching-in-audio-data-processing-1204.2541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-subsequence-matching-in-audio-data-processing-1204.2541"/></url>
<url><loc>https://scifaro.com/en/abs/using-mimicry-to-learn-about-mental-representations-1204.3236</loc><lastmod>2012-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-mimicry-to-learn-about-mental-representations-1204.3236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-mimicry-to-learn-about-mental-representations-1204.3236"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-the-evolution-of-contemporary-western-popular-music-1205.5651</loc><lastmod>2016-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-the-evolution-of-contemporary-western-popular-music-1205.5651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-the-evolution-of-contemporary-western-popular-music-1205.5651"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-performance-of-fpga-based-mel-filter-bank-bark-filter-bank-1206.1450</loc><lastmod>2012-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-performance-of-fpga-based-mel-filter-bank-bark-filter-bank-1206.1450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-performance-of-fpga-based-mel-filter-bank-bark-filter-bank-1206.1450"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-speech-under-stress-using-linear-techniques-and-non-linear-techniques-for-emotion-recognition-system-1207.5104</loc><lastmod>2012-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-speech-under-stress-using-linear-techniques-and-non-linear-techniques-for-emotion-recognition-system-1207.5104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-speech-under-stress-using-linear-techniques-and-non-linear-techniques-for-emotion-recognition-system-1207.5104"/></url>
<url><loc>https://scifaro.com/en/abs/evolving-musical-counterpoint-the-chronopoint-musical-evolution-system-1207.5560</loc><lastmod>2012-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolving-musical-counterpoint-the-chronopoint-musical-evolution-system-1207.5560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolving-musical-counterpoint-the-chronopoint-musical-evolution-system-1207.5560"/></url>
<url><loc>https://scifaro.com/en/abs/algorithm-to-suppress-scanner-noise-in-recorded-speech-during-functional-magnetic-resonance-imaging-1207.5827</loc><lastmod>2012-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/algorithm-to-suppress-scanner-noise-in-recorded-speech-during-functional-magnetic-resonance-imaging-1207.5827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/algorithm-to-suppress-scanner-noise-in-recorded-speech-during-functional-magnetic-resonance-imaging-1207.5827"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-a-modern-voice-morphing-approach-using-gaussian-mixture-models-for-laryngectomees-1208.1418</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-a-modern-voice-morphing-approach-using-gaussian-mixture-models-for-laryngectomees-1208.1418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-a-modern-voice-morphing-approach-using-gaussian-mixture-models-for-laryngectomees-1208.1418"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-method-for-obtaining-a-better-quality-speech-signal-for-cochlear-implants-using-kalman-with-drnl-and-ssb-technique-1210.0171</loc><lastmod>2012-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-method-for-obtaining-a-better-quality-speech-signal-for-cochlear-implants-using-kalman-with-drnl-and-ssb-technique-1210.0171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-method-for-obtaining-a-better-quality-speech-signal-for-cochlear-implants-using-kalman-with-drnl-and-ssb-technique-1210.0171"/></url>
<url><loc>https://scifaro.com/en/abs/blind-speech-separation-based-on-undecimated-wavelet-packet-perceptual-filterbanks-and-independent-component-analysis-1210.3778</loc><lastmod>2012-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-speech-separation-based-on-undecimated-wavelet-packet-perceptual-filterbanks-and-independent-component-analysis-1210.3778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-speech-separation-based-on-undecimated-wavelet-packet-perceptual-filterbanks-and-independent-component-analysis-1210.3778"/></url>
<url><loc>https://scifaro.com/en/abs/semi-blind-source-separation-via-sparse-representations-and-online-dictionary-learning-1212.0451</loc><lastmod>2015-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-blind-source-separation-via-sparse-representations-and-online-dictionary-learning-1212.0451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-blind-source-separation-via-sparse-representations-and-online-dictionary-learning-1212.0451"/></url>
<url><loc>https://scifaro.com/en/abs/a-nuclear-norm-based-convex-formulation-for-informed-source-separation-1212.3119</loc><lastmod>2012-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-nuclear-norm-based-convex-formulation-for-informed-source-separation-1212.3119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-nuclear-norm-based-convex-formulation-for-informed-source-separation-1212.3119"/></url>
<url><loc>https://scifaro.com/en/abs/single-sided-real-time-pesq-score-estimation-1212.6350</loc><lastmod>2013-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-sided-real-time-pesq-score-estimation-1212.6350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-sided-real-time-pesq-score-estimation-1212.6350"/></url>
<url><loc>https://scifaro.com/en/abs/about-multichannel-speech-signal-extraction-and-separation-techniques-1212.6903</loc><lastmod>2013-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/about-multichannel-speech-signal-extraction-and-separation-techniques-1212.6903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/about-multichannel-speech-signal-extraction-and-separation-techniques-1212.6903"/></url>
<url><loc>https://scifaro.com/en/abs/usable-speech-assignment-for-speaker-identification-under-co-channel-situation-1301.0265</loc><lastmod>2013-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usable-speech-assignment-for-speaker-identification-under-co-channel-situation-1301.0265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usable-speech-assignment-for-speaker-identification-under-co-channel-situation-1301.0265"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-a-multi-resolution-dyadic-wavelet-transform-method-for-usable-speech-detection-1301.0278</loc><lastmod>2013-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-a-multi-resolution-dyadic-wavelet-transform-method-for-usable-speech-detection-1301.0278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-a-multi-resolution-dyadic-wavelet-transform-method-for-usable-speech-detection-1301.0278"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-for-classification-of-dysfluent-and-fluent-speech-using-k-nn-and-svm-1301.1932</loc><lastmod>2013-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-for-classification-of-dysfluent-and-fluent-speech-using-k-nn-and-svm-1301.1932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-for-classification-of-dysfluent-and-fluent-speech-using-k-nn-and-svm-1301.1932"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-a-posteriori-estimation-of-piecewise-arcs-in-tempo-time-series-1302.0136</loc><lastmod>2013-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-a-posteriori-estimation-of-piecewise-arcs-in-tempo-time-series-1302.0136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-a-posteriori-estimation-of-piecewise-arcs-in-tempo-time-series-1302.0136"/></url>
<url><loc>https://scifaro.com/en/abs/improved-multiple-birdsong-tracking-with-distribution-derivative-method-and-markov-renewal-process-clustering-1302.3462</loc><lastmod>2014-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-multiple-birdsong-tracking-with-distribution-derivative-method-and-markov-renewal-process-clustering-1302.3462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-multiple-birdsong-tracking-with-distribution-derivative-method-and-markov-renewal-process-clustering-1302.3462"/></url>
<url><loc>https://scifaro.com/en/abs/finite-element-computation-of-elliptical-vocal-tract-impedances-using-the-two-microphone-transfer-function-method-1302.4382</loc><lastmod>2017-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/finite-element-computation-of-elliptical-vocal-tract-impedances-using-the-two-microphone-transfer-function-method-1302.4382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/finite-element-computation-of-elliptical-vocal-tract-impedances-using-the-two-microphone-transfer-function-method-1302.4382"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-i-1302.6031</loc><lastmod>2013-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-i-1302.6031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-i-1302.6031"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-ii-1302.6194</loc><lastmod>2013-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-ii-1302.6194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-discrimination-using-ks-algebra-ii-1302.6194"/></url>
<url><loc>https://scifaro.com/en/abs/sound-localization-using-compressive-sensing-1302.7070</loc><lastmod>2013-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-localization-using-compressive-sensing-1302.7070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-localization-using-compressive-sensing-1302.7070"/></url>
<url><loc>https://scifaro.com/en/abs/consistent-iterative-hard-thresholding-for-signal-declipping-1303.1023</loc><lastmod>2013-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistent-iterative-hard-thresholding-for-signal-declipping-1303.1023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistent-iterative-hard-thresholding-for-signal-declipping-1303.1023"/></url>
<url><loc>https://scifaro.com/en/abs/toward-evolution-strategies-application-in-automatic-polyphonic-music-transcription-using-electronic-synthesis-1304.0969</loc><lastmod>2013-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-evolution-strategies-application-in-automatic-polyphonic-music-transcription-using-electronic-synthesis-1304.0969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-evolution-strategies-application-in-automatic-polyphonic-music-transcription-using-electronic-synthesis-1304.0969"/></url>
<url><loc>https://scifaro.com/en/abs/deep-scattering-spectrum-1304.6763</loc><lastmod>2015-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-scattering-spectrum-1304.6763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-scattering-spectrum-1304.6763"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-echo-cancellation-postfilter-design-issues-for-speech-recognition-system-1305.1141</loc><lastmod>2013-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-postfilter-design-issues-for-speech-recognition-system-1305.1141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-postfilter-design-issues-for-speech-recognition-system-1305.1141"/></url>
<url><loc>https://scifaro.com/en/abs/techniques-for-feature-extraction-in-speech-recognition-system-a-comparative-study-1305.1145</loc><lastmod>2013-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/techniques-for-feature-extraction-in-speech-recognition-system-a-comparative-study-1305.1145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/techniques-for-feature-extraction-in-speech-recognition-system-a-comparative-study-1305.1145"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-modeling-towards-robust-speech-recognition-system-1305.1426</loc><lastmod>2013-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-modeling-towards-robust-speech-recognition-system-1305.1426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-modeling-towards-robust-speech-recognition-system-1305.1426"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-pitch-detection-approach-for-noisy-environment-1305.2352</loc><lastmod>2013-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-pitch-detection-approach-for-noisy-environment-1305.2352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-pitch-detection-approach-for-noisy-environment-1305.2352"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speech-recognition-using-template-model-for-man-machine-interface-1305.2959</loc><lastmod>2013-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speech-recognition-using-template-model-for-man-machine-interface-1305.2959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speech-recognition-using-template-model-for-man-machine-interface-1305.2959"/></url>
<url><loc>https://scifaro.com/en/abs/the-gtzan-dataset-its-contents-its-faults-their-effects-on-evaluation-and-its-future-use-1306.1461</loc><lastmod>2015-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-gtzan-dataset-its-contents-its-faults-their-effects-on-evaluation-and-its-future-use-1306.1461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-gtzan-dataset-its-contents-its-faults-their-effects-on-evaluation-and-its-future-use-1306.1461"/></url>
<url><loc>https://scifaro.com/en/abs/a-perceptual-alphabet-for-the-10-dimensional-phonetic-prosodic-space-1306.2593</loc><lastmod>2020-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-perceptual-alphabet-for-the-10-dimensional-phonetic-prosodic-space-1306.2593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-perceptual-alphabet-for-the-10-dimensional-phonetic-prosodic-space-1306.2593"/></url>
<url><loc>https://scifaro.com/en/abs/harmony-perception-by-periodicity-detection-1306.6458</loc><lastmod>2018-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmony-perception-by-periodicity-detection-1306.6458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmony-perception-by-periodicity-detection-1306.6458"/></url>
<url><loc>https://scifaro.com/en/abs/an-open-dataset-for-research-on-audio-field-recording-archives-freefield1010-1309.5275</loc><lastmod>2013-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-open-dataset-for-research-on-audio-field-recording-archives-freefield1010-1309.5275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-open-dataset-for-research-on-audio-field-recording-archives-freefield1010-1309.5275"/></url>
<url><loc>https://scifaro.com/en/abs/non-negative-matrix-factorization-with-linear-constraints-for-single-channel-speech-enhancement-1309.6047</loc><lastmod>2013-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-with-linear-constraints-for-single-channel-speech-enhancement-1309.6047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-with-linear-constraints-for-single-channel-speech-enhancement-1309.6047"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-discrimination-using-neurons-with-symmetric-nonlinear-response-over-a-spectral-range-1311.0819</loc><lastmod>2013-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-discrimination-using-neurons-with-symmetric-nonlinear-response-over-a-spectral-range-1311.0819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-discrimination-using-neurons-with-symmetric-nonlinear-response-over-a-spectral-range-1311.0819"/></url>
<url><loc>https://scifaro.com/en/abs/an-intuitive-design-approach-for-implementing-real-time-audio-effects-1311.0842</loc><lastmod>2013-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-intuitive-design-approach-for-implementing-real-time-audio-effects-1311.0842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-intuitive-design-approach-for-implementing-real-time-audio-effects-1311.0842"/></url>
<url><loc>https://scifaro.com/en/abs/a-geometric-approach-to-sound-source-localization-from-time-delay-estimates-1311.1047</loc><lastmod>2014-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-geometric-approach-to-sound-source-localization-from-time-delay-estimates-1311.1047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-geometric-approach-to-sound-source-localization-from-time-delay-estimates-1311.1047"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-analysis-of-frequency-modulation-in-birdsong-databases-1311.4764</loc><lastmod>2015-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-analysis-of-frequency-modulation-in-birdsong-databases-1311.4764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-analysis-of-frequency-modulation-in-birdsong-databases-1311.4764"/></url>
<url><loc>https://scifaro.com/en/abs/objets-sonores-une-repr-esentation-bio-inspir-ee-hi-erarchique-parcimonieuse-a-tr-es-grandes-dimensions-utilisable-en-reconnaissance-auditory-objects-bio-inspired-hierarchical-sparse-high-dimensional-representation-for-recognition-1311.5924</loc><lastmod>2013-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objets-sonores-une-repr-esentation-bio-inspir-ee-hi-erarchique-parcimonieuse-a-tr-es-grandes-dimensions-utilisable-en-reconnaissance-auditory-objects-bio-inspired-hierarchical-sparse-high-dimensional-representation-for-recognition-1311.5924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objets-sonores-une-repr-esentation-bio-inspir-ee-hi-erarchique-parcimonieuse-a-tr-es-grandes-dimensions-utilisable-en-reconnaissance-auditory-objects-bio-inspired-hierarchical-sparse-high-dimensional-representation-for-recognition-1311.5924"/></url>
<url><loc>https://scifaro.com/en/abs/reverberant-audio-source-separation-via-sparse-and-low-rank-modeling-1312.2795</loc><lastmod>2015-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberant-audio-source-separation-via-sparse-and-low-rank-modeling-1312.2795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberant-audio-source-separation-via-sparse-and-low-rank-modeling-1312.2795"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-method-to-produce-algorithmic-midi-music-based-on-randomness-simple-probabilities-and-multi-threading-1312.4014</loc><lastmod>2013-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-method-to-produce-algorithmic-midi-music-based-on-randomness-simple-probabilities-and-multi-threading-1312.4014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-method-to-produce-algorithmic-midi-music-based-on-randomness-simple-probabilities-and-multi-threading-1312.4014"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-approach-for-co-channel-speech-segregation-based-on-casa-hmm-multipitch-tracking-and-medium-frame-harmonic-model-1312.4127</loc><lastmod>2013-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-approach-for-co-channel-speech-segregation-based-on-casa-hmm-multipitch-tracking-and-medium-frame-harmonic-model-1312.4127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-approach-for-co-channel-speech-segregation-based-on-casa-hmm-multipitch-tracking-and-medium-frame-harmonic-model-1312.4127"/></url>
<url><loc>https://scifaro.com/en/abs/tdoa-based-localization-in-two-dimensions-the-bifurcation-curve-1402.1530</loc><lastmod>2016-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tdoa-based-localization-in-two-dimensions-the-bifurcation-curve-1402.1530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tdoa-based-localization-in-two-dimensions-the-bifurcation-curve-1402.1530"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-space-learning-for-sound-source-separation-and-localization-on-binaural-manifolds-1402.2683</loc><lastmod>2015-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-space-learning-for-sound-source-separation-and-localization-on-binaural-manifolds-1402.2683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-space-learning-for-sound-source-separation-and-localization-on-binaural-manifolds-1402.2683"/></url>
<url><loc>https://scifaro.com/en/abs/sound-representation-and-classification-benchmark-for-domestic-robots-1402.3689</loc><lastmod>2014-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-representation-and-classification-benchmark-for-domestic-robots-1402.3689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-representation-and-classification-benchmark-for-domestic-robots-1402.3689"/></url>
<url><loc>https://scifaro.com/en/abs/maximizing-the-signal-to-alias-ratio-in-non-uniform-filter-banks-for-acoustic-echo-cancellation-1402.4160</loc><lastmod>2014-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximizing-the-signal-to-alias-ratio-in-non-uniform-filter-banks-for-acoustic-echo-cancellation-1402.4160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximizing-the-signal-to-alias-ratio-in-non-uniform-filter-banks-for-acoustic-echo-cancellation-1402.4160"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-doa-estimation-of-wideband-sound-sources-using-circular-harmonics-1403.1501</loc><lastmod>2014-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-doa-estimation-of-wideband-sound-sources-using-circular-harmonics-1403.1501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-doa-estimation-of-wideband-sound-sources-using-circular-harmonics-1403.1501"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-window-and-lattice-in-gabor-transform-application-to-audio-analysis-1403.2180</loc><lastmod>2014-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-window-and-lattice-in-gabor-transform-application-to-audio-analysis-1403.2180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-window-and-lattice-in-gabor-transform-application-to-audio-analysis-1403.2180"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-segmentation-of-broadcast-news-audio-using-self-similarity-matrix-1403.6901</loc><lastmod>2014-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-segmentation-of-broadcast-news-audio-using-self-similarity-matrix-1403.6901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-segmentation-of-broadcast-news-audio-using-self-similarity-matrix-1403.6901"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-representation-for-invariance-and-music-classification-1404.0400</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-representation-for-invariance-and-music-classification-1404.0400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-representation-for-invariance-and-music-classification-1404.0400"/></url>
<url><loc>https://scifaro.com/en/abs/high-throughput-and-less-area-amp-architecture-for-audio-signal-restoration-1404.1468</loc><lastmod>2014-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-throughput-and-less-area-amp-architecture-for-audio-signal-restoration-1404.1468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-throughput-and-less-area-amp-architecture-for-audio-signal-restoration-1404.1468"/></url>
<url><loc>https://scifaro.com/en/abs/idealized-computational-models-for-auditory-receptive-fields-1404.2037</loc><lastmod>2015-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/idealized-computational-models-for-auditory-receptive-fields-1404.2037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/idealized-computational-models-for-auditory-receptive-fields-1404.2037"/></url>
<url><loc>https://scifaro.com/en/abs/improving-blind-source-separation-performance-by-adaptive-array-geometries-for-humanoid-robots-1404.6881</loc><lastmod>2014-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-blind-source-separation-performance-by-adaptive-array-geometries-for-humanoid-robots-1404.6881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-blind-source-separation-performance-by-adaptive-array-geometries-for-humanoid-robots-1404.6881"/></url>
<url><loc>https://scifaro.com/en/abs/design-and-optimization-of-a-speech-recognition-front-end-for-distant-talking-control-of-a-music-playback-device-1405.1379</loc><lastmod>2014-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-and-optimization-of-a-speech-recognition-front-end-for-distant-talking-control-of-a-music-playback-device-1405.1379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-and-optimization-of-a-speech-recognition-front-end-for-distant-talking-control-of-a-music-playback-device-1405.1379"/></url>
<url><loc>https://scifaro.com/en/abs/trends-and-perspectives-for-signal-processing-in-consumer-audio-1405.4843</loc><lastmod>2014-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trends-and-perspectives-for-signal-processing-in-consumer-audio-1405.4843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trends-and-perspectives-for-signal-processing-in-consumer-audio-1405.4843"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-large-scale-classification-of-bird-sounds-is-strongly-improved-by-unsupervised-feature-learning-1405.6524</loc><lastmod>2014-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-large-scale-classification-of-bird-sounds-is-strongly-improved-by-unsupervised-feature-learning-1405.6524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-large-scale-classification-of-bird-sounds-is-strongly-improved-by-unsupervised-feature-learning-1405.6524"/></url>
<url><loc>https://scifaro.com/en/abs/sparsity-aware-filtered-x-affine-projection-algorithms-for-active-noise-control-1405.6945</loc><lastmod>2014-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsity-aware-filtered-x-affine-projection-algorithms-for-active-noise-control-1405.6945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsity-aware-filtered-x-affine-projection-algorithms-for-active-noise-control-1405.6945"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-signal-digital-processing-instrument-for-analog-to-digital-conversion-study-1405.7866</loc><lastmod>2014-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-signal-digital-processing-instrument-for-analog-to-digital-conversion-study-1405.7866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-signal-digital-processing-instrument-for-analog-to-digital-conversion-study-1405.7866"/></url>
<url><loc>https://scifaro.com/en/abs/music-and-vocal-separation-using-multi-band-modulation-based-features-1406.2464</loc><lastmod>2014-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-and-vocal-separation-using-multi-band-modulation-based-features-1406.2464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-and-vocal-separation-using-multi-band-modulation-based-features-1406.2464"/></url>
<url><loc>https://scifaro.com/en/abs/learning-an-invariant-speech-representation-1406.3884</loc><lastmod>2014-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-an-invariant-speech-representation-1406.3884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-an-invariant-speech-representation-1406.3884"/></url>
<url><loc>https://scifaro.com/en/abs/a-bengali-hmm-based-speech-synthesis-system-1406.3915</loc><lastmod>2014-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bengali-hmm-based-speech-synthesis-system-1406.3915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bengali-hmm-based-speech-synthesis-system-1406.3915"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-fado-music-classification-1406.4447</loc><lastmod>2014-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-fado-music-classification-1406.4447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-fado-music-classification-1406.4447"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-level-data-fusion-approach-for-speaker-identification-on-telephone-speech-1407.0380</loc><lastmod>2014-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-level-data-fusion-approach-for-speaker-identification-on-telephone-speech-1407.0380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-level-data-fusion-approach-for-speaker-identification-on-telephone-speech-1407.0380"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-steered-response-power-methods-for-sound-source-localization-using-microphone-arrays-1407.2351</loc><lastmod>2015-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-steered-response-power-methods-for-sound-source-localization-using-microphone-arrays-1407.2351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-steered-response-power-methods-for-sound-source-localization-using-microphone-arrays-1407.2351"/></url>
<url><loc>https://scifaro.com/en/abs/speech-polarity-detection-using-hilbert-phase-information-1407.3398</loc><lastmod>2014-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-polarity-detection-using-hilbert-phase-information-1407.3398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-polarity-detection-using-hilbert-phase-information-1407.3398"/></url>
<url><loc>https://scifaro.com/en/abs/raking-the-cocktail-party-1407.5514</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raking-the-cocktail-party-1407.5514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raking-the-cocktail-party-1407.5514"/></url>
<url><loc>https://scifaro.com/en/abs/co-localization-of-audio-sources-in-images-using-binaural-features-and-locally-linear-regression-1408.2700</loc><lastmod>2016-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/co-localization-of-audio-sources-in-images-using-binaural-features-and-locally-linear-regression-1408.2700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/co-localization-of-audio-sources-in-images-using-binaural-features-and-locally-linear-regression-1408.2700"/></url>
<url><loc>https://scifaro.com/en/abs/computerized-multi-microphone-test-system-1409.0117</loc><lastmod>2014-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computerized-multi-microphone-test-system-1409.0117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computerized-multi-microphone-test-system-1409.0117"/></url>
<url><loc>https://scifaro.com/en/abs/ad-hoc-microphone-array-calibration-euclidean-distance-matrix-completion-algorithm-and-theoretical-guarantees-1409.0203</loc><lastmod>2014-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ad-hoc-microphone-array-calibration-euclidean-distance-matrix-completion-algorithm-and-theoretical-guarantees-1409.0203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ad-hoc-microphone-array-calibration-euclidean-distance-matrix-completion-algorithm-and-theoretical-guarantees-1409.0203"/></url>
<url><loc>https://scifaro.com/en/abs/dsp-ear-leveraging-co-processor-support-for-continuous-audio-sensing-on-smartphones-1409.3206</loc><lastmod>2014-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsp-ear-leveraging-co-processor-support-for-continuous-audio-sensing-on-smartphones-1409.3206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsp-ear-leveraging-co-processor-support-for-continuous-audio-sensing-on-smartphones-1409.3206"/></url>
<url><loc>https://scifaro.com/en/abs/a-single-processor-approach-to-speech-processing-pipeline-of-bilateral-cochlear-implants-1409.6554</loc><lastmod>2014-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-single-processor-approach-to-speech-processing-pipeline-of-bilateral-cochlear-implants-1409.6554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-single-processor-approach-to-speech-processing-pipeline-of-bilateral-cochlear-implants-1409.6554"/></url>
<url><loc>https://scifaro.com/en/abs/audio-surveillance-a-systematic-review-1409.7787</loc><lastmod>2014-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-surveillance-a-systematic-review-1409.7787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-surveillance-a-systematic-review-1409.7787"/></url>
<url><loc>https://scifaro.com/en/abs/phase-optimized-k-svd-for-signal-extraction-from-underdetermined-multichannel-sparse-mixtures-1410.2430</loc><lastmod>2014-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-optimized-k-svd-for-signal-extraction-from-underdetermined-multichannel-sparse-mixtures-1410.2430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-optimized-k-svd-for-signal-extraction-from-underdetermined-multichannel-sparse-mixtures-1410.2430"/></url>
<url><loc>https://scifaro.com/en/abs/choice-of-mel-filter-bank-in-computing-mfcc-of-a-resampled-speech-1410.6903</loc><lastmod>2014-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/choice-of-mel-filter-bank-in-computing-mfcc-of-a-resampled-speech-1410.6903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/choice-of-mel-filter-bank-in-computing-mfcc-of-a-resampled-speech-1410.6903"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-stress-information-in-speech-for-speaker-recognition-1410.6905</loc><lastmod>2014-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-stress-information-in-speech-for-speaker-recognition-1410.6905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-stress-information-in-speech-for-speaker-recognition-1410.6905"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-transitions-between-broad-phonetic-classes-in-a-speech-signal-1411.0370</loc><lastmod>2014-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-transitions-between-broad-phonetic-classes-in-a-speech-signal-1411.0370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-transitions-between-broad-phonetic-classes-in-a-speech-signal-1411.0370"/></url>
<url><loc>https://scifaro.com/en/abs/an-interesting-property-of-lpcs-for-sonorant-vs-fricative-discrimination-1411.1267</loc><lastmod>2014-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-interesting-property-of-lpcs-for-sonorant-vs-fricative-discrimination-1411.1267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-interesting-property-of-lpcs-for-sonorant-vs-fricative-discrimination-1411.1267"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-uncertainty-parameter-sr-signal-to-residual-spectrum-ratio-evaluation-approach-for-speech-enhancement-1411.1898</loc><lastmod>2014-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-uncertainty-parameter-sr-signal-to-residual-spectrum-ratio-evaluation-approach-for-speech-enhancement-1411.1898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-uncertainty-parameter-sr-signal-to-residual-spectrum-ratio-evaluation-approach-for-speech-enhancement-1411.1898"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-source-subtraction-based-on-incomplete-measurements-of-relative-transfer-function-1411.2744</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-source-subtraction-based-on-incomplete-measurements-of-relative-transfer-function-1411.2744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-source-subtraction-based-on-incomplete-measurements-of-relative-transfer-function-1411.2744"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-from-youtube-obtained-data-1411.2795</loc><lastmod>2014-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-from-youtube-obtained-data-1411.2795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-from-youtube-obtained-data-1411.2795"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-1411.3715</loc><lastmod>2015-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-1411.3715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-1411.3715"/></url>
<url><loc>https://scifaro.com/en/abs/which-are-you-in-a-photo-1411.4890</loc><lastmod>2014-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/which-are-you-in-a-photo-1411.4890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/which-are-you-in-a-photo-1411.4890"/></url>
<url><loc>https://scifaro.com/en/abs/a-complex-matrix-factorization-approach-to-joint-modeling-of-magnitude-and-phase-for-source-separation-1411.6741</loc><lastmod>2014-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-complex-matrix-factorization-approach-to-joint-modeling-of-magnitude-and-phase-for-source-separation-1411.6741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-complex-matrix-factorization-approach-to-joint-modeling-of-magnitude-and-phase-for-source-separation-1411.6741"/></url>
<url><loc>https://scifaro.com/en/abs/the-bag-of-frames-approach-a-not-so-sufficient-model-for-urban-soundscapes-1412.4052</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-bag-of-frames-approach-a-not-so-sufficient-model-for-urban-soundscapes-1412.4052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-bag-of-frames-approach-a-not-so-sufficient-model-for-urban-soundscapes-1412.4052"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-multi-embeddings-learning-of-acoustic-models-1412.6645</loc><lastmod>2015-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-multi-embeddings-learning-of-acoustic-models-1412.6645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-multi-embeddings-learning-of-acoustic-models-1412.6645"/></url>
<url><loc>https://scifaro.com/en/abs/musical-elements-in-the-discrete-time-representation-of-sound-1412.6853</loc><lastmod>2017-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-elements-in-the-discrete-time-representation-of-sound-1412.6853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-elements-in-the-discrete-time-representation-of-sound-1412.6853"/></url>
<url><loc>https://scifaro.com/en/abs/audio-source-separation-with-discriminative-scattering-networks-1412.7022</loc><lastmod>2015-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-source-separation-with-discriminative-scattering-networks-1412.7022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-source-separation-with-discriminative-scattering-networks-1412.7022"/></url>
<url><loc>https://scifaro.com/en/abs/audio-source-separation-using-a-deep-autoencoder-1412.7193</loc><lastmod>2014-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-source-separation-using-a-deep-autoencoder-1412.7193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-source-separation-using-a-deep-autoencoder-1412.7193"/></url>
<url><loc>https://scifaro.com/en/abs/listening-to-features-1501.04981</loc><lastmod>2015-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-to-features-1501.04981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-to-features-1501.04981"/></url>
<url><loc>https://scifaro.com/en/abs/implementation-of-an-automatic-syllabic-division-algorithm-from-speech-files-in-portuguese-language-1501.07496</loc><lastmod>2018-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementation-of-an-automatic-syllabic-division-algorithm-from-speech-files-in-portuguese-language-1501.07496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementation-of-an-automatic-syllabic-division-algorithm-from-speech-files-in-portuguese-language-1501.07496"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-classifiers-in-performing-speaker-accent-recognition-using-mfccs-1501.07866</loc><lastmod>2015-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-classifiers-in-performing-speaker-accent-recognition-using-mfccs-1501.07866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-classifiers-in-performing-speaker-accent-recognition-using-mfccs-1501.07866"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-incremental-learning-and-prediction-of-music-signals-1502.00524</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-incremental-learning-and-prediction-of-music-signals-1502.00524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-incremental-learning-and-prediction-of-music-signals-1502.00524"/></url>
<url><loc>https://scifaro.com/en/abs/cs-reconstruction-of-the-speech-and-musical-signals-1502.01707</loc><lastmod>2015-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cs-reconstruction-of-the-speech-and-musical-signals-1502.01707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cs-reconstruction-of-the-speech-and-musical-signals-1502.01707"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-head-related-impulse-response-for-efficient-direct-convolution-1502.03162</loc><lastmod>2015-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-head-related-impulse-response-for-efficient-direct-convolution-1502.03162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-head-related-impulse-response-for-efficient-direct-convolution-1502.03162"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-process-models-for-hrtf-based-sound-source-localization-and-active-learning-1502.03163</loc><lastmod>2015-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-process-models-for-hrtf-based-sound-source-localization-and-active-learning-1502.03163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-process-models-for-hrtf-based-sound-source-localization-and-active-learning-1502.03163"/></url>
<url><loc>https://scifaro.com/en/abs/a-full-frequency-masking-vocoder-for-legal-eavesdropping-conversation-recording-1502.03387</loc><lastmod>2018-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-full-frequency-masking-vocoder-for-legal-eavesdropping-conversation-recording-1502.03387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-full-frequency-masking-vocoder-for-legal-eavesdropping-conversation-recording-1502.03387"/></url>
<url><loc>https://scifaro.com/en/abs/coherent-to-diffuse-power-ratio-estimation-for-dereverberation-1502.03784</loc><lastmod>2015-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coherent-to-diffuse-power-ratio-estimation-for-dereverberation-1502.03784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coherent-to-diffuse-power-ratio-estimation-for-dereverberation-1502.03784"/></url>
<url><loc>https://scifaro.com/en/abs/joint-optimization-of-masks-and-deep-recurrent-neural-networks-for-monaural-source-separation-1502.04149</loc><lastmod>2015-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-optimization-of-masks-and-deep-recurrent-neural-networks-for-monaural-source-separation-1502.04149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-optimization-of-masks-and-deep-recurrent-neural-networks-for-monaural-source-separation-1502.04149"/></url>
<url><loc>https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-based-on-harmonic-plus-noise-model-and-singing-expression-analysis-1502.04300</loc><lastmod>2015-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-based-on-harmonic-plus-noise-model-and-singing-expression-analysis-1502.04300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-based-on-harmonic-plus-noise-model-and-singing-expression-analysis-1502.04300"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-synthesis-of-room-acoustics-via-scattering-delay-networks-1502.05751</loc><lastmod>2015-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-synthesis-of-room-acoustics-via-scattering-delay-networks-1502.05751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-synthesis-of-room-acoustics-via-scattering-delay-networks-1502.05751"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-audio-features-and-statistical-models-exploited-for-voice-pattern-design-1502.06811</loc><lastmod>2015-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-audio-features-and-statistical-models-exploited-for-voice-pattern-design-1502.06811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-audio-features-and-statistical-models-exploited-for-voice-pattern-design-1502.06811"/></url>
<url><loc>https://scifaro.com/en/abs/plagiarism-detection-in-polyphonic-music-using-monaural-signal-separation-1503.00022</loc><lastmod>2016-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plagiarism-detection-in-polyphonic-music-using-monaural-signal-separation-1503.00022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plagiarism-detection-in-polyphonic-music-using-monaural-signal-separation-1503.00022"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-spatial-audio-reproduction-schemes-for-application-in-hearing-aid-research-1503.00586</loc><lastmod>2015-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-spatial-audio-reproduction-schemes-for-application-in-hearing-aid-research-1503.00586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-spatial-audio-reproduction-schemes-for-application-in-hearing-aid-research-1503.00586"/></url>
<url><loc>https://scifaro.com/en/abs/deep-transform-time-domain-audio-error-correction-via-probabilistic-re-synthesis-1503.05849</loc><lastmod>2015-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-transform-time-domain-audio-error-correction-via-probabilistic-re-synthesis-1503.05849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-transform-time-domain-audio-error-correction-via-probabilistic-re-synthesis-1503.05849"/></url>
<url><loc>https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-probabilistic-re-synthesis-1503.06046</loc><lastmod>2015-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-probabilistic-re-synthesis-1503.06046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-probabilistic-re-synthesis-1503.06046"/></url>
<url><loc>https://scifaro.com/en/abs/probabilistic-binary-mask-cocktail-party-source-separation-in-a-convolutional-deep-neural-network-1503.06962</loc><lastmod>2015-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probabilistic-binary-mask-cocktail-party-source-separation-in-a-convolutional-deep-neural-network-1503.06962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probabilistic-binary-mask-cocktail-party-source-separation-in-a-convolutional-deep-neural-network-1503.06962"/></url>
<url><loc>https://scifaro.com/en/abs/online-monaural-speech-enhancement-based-on-periodicity-analysis-and-a-priori-snr-estimation-1503.07015</loc><lastmod>2015-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-monaural-speech-enhancement-based-on-periodicity-analysis-and-a-priori-snr-estimation-1503.07015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-monaural-speech-enhancement-based-on-periodicity-analysis-and-a-priori-snr-estimation-1503.07015"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-event-detection-for-multiple-overlapping-similar-sources-1503.07150</loc><lastmod>2015-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-event-detection-for-multiple-overlapping-similar-sources-1503.07150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-event-detection-for-multiple-overlapping-similar-sources-1503.07150"/></url>
<url><loc>https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-complex-convolution-in-a-deep-neural-network-1504.02945</loc><lastmod>2015-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-complex-convolution-in-a-deep-neural-network-1504.02945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-transform-cocktail-party-source-separation-via-complex-convolution-in-a-deep-neural-network-1504.02945"/></url>
<url><loc>https://scifaro.com/en/abs/absolute-geometry-calibration-of-distributed-microphone-arrays-in-an-audio-visual-sensor-network-1504.03128</loc><lastmod>2015-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/absolute-geometry-calibration-of-distributed-microphone-arrays-in-an-audio-visual-sensor-network-1504.03128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/absolute-geometry-calibration-of-distributed-microphone-arrays-in-an-audio-visual-sensor-network-1504.03128"/></url>
<url><loc>https://scifaro.com/en/abs/deep-karaoke-extracting-vocals-from-musical-mixtures-using-a-convolutional-deep-neural-network-1504.04658</loc><lastmod>2015-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-karaoke-extracting-vocals-from-musical-mixtures-using-a-convolutional-deep-neural-network-1504.04658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-karaoke-extracting-vocals-from-musical-mixtures-using-a-convolutional-deep-neural-network-1504.04658"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-trade-offs-for-audio-source-separation-with-binary-masks-1504.07372</loc><lastmod>2015-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-trade-offs-for-audio-source-separation-with-binary-masks-1504.07372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-trade-offs-for-audio-source-separation-with-binary-masks-1504.07372"/></url>
<url><loc>https://scifaro.com/en/abs/who-spoke-what-a-latent-variable-framework-for-the-joint-decoding-of-multiple-speakers-and-their-keywords-1504.08021</loc><lastmod>2015-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-spoke-what-a-latent-variable-framework-for-the-joint-decoding-of-multiple-speakers-and-their-keywords-1504.08021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-spoke-what-a-latent-variable-framework-for-the-joint-decoding-of-multiple-speakers-and-their-keywords-1504.08021"/></url>
<url><loc>https://scifaro.com/en/abs/noise-sensitivity-of-teager-kaiser-energy-operators-and-their-ratios-1504.08177</loc><lastmod>2015-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-sensitivity-of-teager-kaiser-energy-operators-and-their-ratios-1504.08177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-sensitivity-of-teager-kaiser-energy-operators-and-their-ratios-1504.08177"/></url>
<url><loc>https://scifaro.com/en/abs/deep-remix-remixing-musical-mixtures-using-a-convolutional-deep-neural-network-1505.00289</loc><lastmod>2015-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-remix-remixing-musical-mixtures-using-a-convolutional-deep-neural-network-1505.00289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-remix-remixing-musical-mixtures-using-a-convolutional-deep-neural-network-1505.00289"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-parameterization-of-the-room-transfer-function-1505.04385</loc><lastmod>2015-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-parameterization-of-the-room-transfer-function-1505.04385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-parameterization-of-the-room-transfer-function-1505.04385"/></url>
<url><loc>https://scifaro.com/en/abs/sparsity-and-cosparsity-for-audio-declipping-a-flexible-non-convex-approach-1506.01830</loc><lastmod>2015-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsity-and-cosparsity-for-audio-declipping-a-flexible-non-convex-approach-1506.01830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsity-and-cosparsity-for-audio-declipping-a-flexible-non-convex-approach-1506.01830"/></url>
<url><loc>https://scifaro.com/en/abs/hybridized-feature-extraction-and-acoustic-modelling-approach-for-dysarthric-speech-recognition-1506.02170</loc><lastmod>2015-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybridized-feature-extraction-and-acoustic-modelling-approach-for-dysarthric-speech-recognition-1506.02170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybridized-feature-extraction-and-acoustic-modelling-approach-for-dysarthric-speech-recognition-1506.02170"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-coherent-to-diffuse-ratio-estimation-for-dereverberation-using-an-itd-model-1506.03604</loc><lastmod>2015-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-coherent-to-diffuse-ratio-estimation-for-dereverberation-using-an-itd-model-1506.03604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-coherent-to-diffuse-ratio-estimation-for-dereverberation-using-an-itd-model-1506.03604"/></url>
<url><loc>https://scifaro.com/en/abs/channel-interaction-and-current-level-affect-across-electrode-integration-of-interaural-time-differences-in-bilateral-cochlear-implant-listeners-1506.03701</loc><lastmod>2015-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-interaction-and-current-level-affect-across-electrode-integration-of-interaural-time-differences-in-bilateral-cochlear-implant-listeners-1506.03701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-interaction-and-current-level-affect-across-electrode-integration-of-interaural-time-differences-in-bilateral-cochlear-implant-listeners-1506.03701"/></url>
<url><loc>https://scifaro.com/en/abs/deep-denoising-auto-encoder-for-statistical-speech-synthesis-1506.05268</loc><lastmod>2015-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-denoising-auto-encoder-for-statistical-speech-synthesis-1506.05268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-denoising-auto-encoder-for-statistical-speech-synthesis-1506.05268"/></url>
<url><loc>https://scifaro.com/en/abs/detection-and-analysis-of-emotion-from-speech-signals-1506.06832</loc><lastmod>2015-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-and-analysis-of-emotion-from-speech-signals-1506.06832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-and-analysis-of-emotion-from-speech-signals-1506.06832"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-generalization-of-relative-transfer-functions-to-more-than-one-source-1507.00201</loc><lastmod>2015-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-generalization-of-relative-transfer-functions-to-more-than-one-source-1507.00201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-generalization-of-relative-transfer-functions-to-more-than-one-source-1507.00201"/></url>
<url><loc>https://scifaro.com/en/abs/cover-song-identification-with-timbral-shape-sequences-1507.05143</loc><lastmod>2015-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cover-song-identification-with-timbral-shape-sequences-1507.05143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cover-song-identification-with-timbral-shape-sequences-1507.05143"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-identification-of-animal-breeds-and-species-using-bioacoustics-and-artificial-neural-networks-1507.05546</loc><lastmod>2015-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-identification-of-animal-breeds-and-species-using-bioacoustics-and-artificial-neural-networks-1507.05546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-identification-of-animal-breeds-and-species-using-bioacoustics-and-artificial-neural-networks-1507.05546"/></url>
<url><loc>https://scifaro.com/en/abs/the-sysu-system-for-the-interspeech-2015-automatic-speaker-verification-spoofing-and-countermeasures-challenge-1507.06711</loc><lastmod>2015-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sysu-system-for-the-interspeech-2015-automatic-speaker-verification-spoofing-and-countermeasures-challenge-1507.06711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sysu-system-for-the-interspeech-2015-automatic-speaker-verification-spoofing-and-countermeasures-challenge-1507.06711"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-for-the-temporal-evolution-of-the-spatial-coherence-in-decaying-reverberant-sound-fields-1507.07348</loc><lastmod>2015-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-for-the-temporal-evolution-of-the-spatial-coherence-in-decaying-reverberant-sound-fields-1507.07348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-for-the-temporal-evolution-of-the-spatial-coherence-in-decaying-reverberant-sound-fields-1507.07348"/></url>
<url><loc>https://scifaro.com/en/abs/stc-anti-spoofing-systems-for-the-asvspoof-2015-challenge-1507.08074</loc><lastmod>2015-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stc-anti-spoofing-systems-for-the-asvspoof-2015-challenge-1507.08074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stc-anti-spoofing-systems-for-the-asvspoof-2015-challenge-1507.08074"/></url>
<url><loc>https://scifaro.com/en/abs/significance-of-maximum-spectral-amplitude-in-sub-bands-for-spectral-envelope-estimation-and-its-application-to-statistical-parametric-speech-synthesis-1508.00354</loc><lastmod>2015-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/significance-of-maximum-spectral-amplitude-in-sub-bands-for-spectral-envelope-estimation-and-its-application-to-statistical-parametric-speech-synthesis-1508.00354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/significance-of-maximum-spectral-amplitude-in-sub-bands-for-spectral-envelope-estimation-and-its-application-to-statistical-parametric-speech-synthesis-1508.00354"/></url>
<url><loc>https://scifaro.com/en/abs/using-deep-learning-for-detecting-spoofing-attacks-on-speech-signals-1508.01746</loc><lastmod>2016-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-deep-learning-for-detecting-spoofing-attacks-on-speech-signals-1508.01746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-deep-learning-for-detecting-spoofing-attacks-on-speech-signals-1508.01746"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-sound-source-localization-based-on-manifold-regularization-1508.03148</loc><lastmod>2015-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-sound-source-localization-based-on-manifold-regularization-1508.03148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-sound-source-localization-based-on-manifold-regularization-1508.03148"/></url>
<url><loc>https://scifaro.com/en/abs/histogram-of-gradients-of-time-frequency-representations-for-audio-scene-detection-1508.04909</loc><lastmod>2015-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/histogram-of-gradients-of-time-frequency-representations-for-audio-scene-detection-1508.04909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/histogram-of-gradients-of-time-frequency-representations-for-audio-scene-detection-1508.04909"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-reconfigurable-hardware-design-for-speech-enhancement-based-on-multi-band-spectral-subtraction-involving-magnitude-and-phase-components-1508.06056</loc><lastmod>2015-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-reconfigurable-hardware-design-for-speech-enhancement-based-on-multi-band-spectral-subtraction-involving-magnitude-and-phase-components-1508.06056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-reconfigurable-hardware-design-for-speech-enhancement-based-on-multi-band-spectral-subtraction-involving-magnitude-and-phase-components-1508.06056"/></url>
<url><loc>https://scifaro.com/en/abs/transposition-of-notations-in-just-intonation-1508.07739</loc><lastmod>2016-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transposition-of-notations-in-just-intonation-1508.07739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transposition-of-notations-in-just-intonation-1508.07739"/></url>
<url><loc>https://scifaro.com/en/abs/transform-ee-en-scattering-sur-la-spirale-temps-chroma-octave-1509.00334</loc><lastmod>2015-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transform-ee-en-scattering-sur-la-spirale-temps-chroma-octave-1509.00334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transform-ee-en-scattering-sur-la-spirale-temps-chroma-octave-1509.00334"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-and-recognition-of-reverberant-and-noisy-speech-by-extending-its-coherence-1509.00533</loc><lastmod>2015-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-and-recognition-of-reverberant-and-noisy-speech-by-extending-its-coherence-1509.00533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-and-recognition-of-reverberant-and-noisy-speech-by-extending-its-coherence-1509.00533"/></url>
<url><loc>https://scifaro.com/en/abs/source-localization-and-denoising-a-perspective-from-the-tdoa-space-1509.02380</loc><lastmod>2016-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-localization-and-denoising-a-perspective-from-the-tdoa-space-1509.02380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-localization-and-denoising-a-perspective-from-the-tdoa-space-1509.02380"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-the-direct-path-relative-transfer-function-for-supervised-sound-source-localization-1509.03205</loc><lastmod>2016-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-the-direct-path-relative-transfer-function-for-supervised-sound-source-localization-1509.03205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-the-direct-path-relative-transfer-function-for-supervised-sound-source-localization-1509.03205"/></url>
<url><loc>https://scifaro.com/en/abs/background-tracking-acoustic-features-for-genre-identification-of-broadcast-shows-1509.04934</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/background-tracking-acoustic-features-for-genre-identification-of-broadcast-shows-1509.04934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/background-tracking-acoustic-features-for-genre-identification-of-broadcast-shows-1509.04934"/></url>
<url><loc>https://scifaro.com/en/abs/melodic-contour-and-mid-level-global-features-applied-to-the-analysis-of-flamenco-cantes-1509.04956</loc><lastmod>2015-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melodic-contour-and-mid-level-global-features-applied-to-the-analysis-of-flamenco-cantes-1509.04956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melodic-contour-and-mid-level-global-features-applied-to-the-analysis-of-flamenco-cantes-1509.04956"/></url>
<url><loc>https://scifaro.com/en/abs/post-processing-speech-recordings-during-mri-1509.05254</loc><lastmod>2016-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/post-processing-speech-recordings-during-mri-1509.05254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/post-processing-speech-recordings-during-mri-1509.05254"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-ioa-cas-speech-separation-and-recognition-system-for-the-third-chime-challenge-1509.06103</loc><lastmod>2015-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-ioa-cas-speech-separation-and-recognition-system-for-the-third-chime-challenge-1509.06103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-ioa-cas-speech-separation-and-recognition-system-for-the-third-chime-challenge-1509.06103"/></url>
<url><loc>https://scifaro.com/en/abs/sports-highlights-generation-based-on-acoustic-events-detection-a-rugby-case-study-1509.06279</loc><lastmod>2015-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sports-highlights-generation-based-on-acoustic-events-detection-a-rugby-case-study-1509.06279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sports-highlights-generation-based-on-acoustic-events-detection-a-rugby-case-study-1509.06279"/></url>
<url><loc>https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-distant-speech-recognition-1509.06882</loc><lastmod>2015-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-distant-speech-recognition-1509.06882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-distant-speech-recognition-1509.06882"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-asr-for-the-third-chime-challenge-exploiting-time-frequency-masking-based-multi-channel-speech-enhancement-and-recurrent-neural-network-1509.07211</loc><lastmod>2015-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-asr-for-the-third-chime-challenge-exploiting-time-frequency-masking-based-multi-channel-speech-enhancement-and-recurrent-neural-network-1509.07211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-asr-for-the-third-chime-challenge-exploiting-time-frequency-masking-based-multi-channel-speech-enhancement-and-recurrent-neural-network-1509.07211"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-universal-background-sparse-coding-based-speaker-verification-on-timit-1509.07298</loc><lastmod>2017-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-universal-background-sparse-coding-based-speaker-verification-on-timit-1509.07298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-universal-background-sparse-coding-based-speaker-verification-on-timit-1509.07298"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-in-the-stft-domain-1509.07411</loc><lastmod>2015-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-in-the-stft-domain-1509.07411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-in-the-stft-domain-1509.07411"/></url>
<url><loc>https://scifaro.com/en/abs/a-dedicated-greedy-pursuit-algorithm-for-sparse-spectral-representation-of-music-sound-1509.07659</loc><lastmod>2016-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dedicated-greedy-pursuit-algorithm-for-sparse-spectral-representation-of-music-sound-1509.07659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dedicated-greedy-pursuit-algorithm-for-sparse-spectral-representation-of-music-sound-1509.07659"/></url>
<url><loc>https://scifaro.com/en/abs/processing-of-acoustical-signals-via-a-wavelet-based-analysis-1509.09113</loc><lastmod>2015-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/processing-of-acoustical-signals-via-a-wavelet-based-analysis-1509.09113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/processing-of-acoustical-signals-via-a-wavelet-based-analysis-1509.09113"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-integration-for-blind-and-non-blind-reverberation-time-estimation-1510.00266</loc><lastmod>2015-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-integration-for-blind-and-non-blind-reverberation-time-estimation-1510.00266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-integration-for-blind-and-non-blind-reverberation-time-estimation-1510.00266"/></url>
<url><loc>https://scifaro.com/en/abs/the-icstm-tum-up-approach-to-the-3rd-chime-challenge-single-channel-lstm-speech-enhancement-with-multi-channel-correlation-shaping-dereverberation-and-lstm-language-models-1510.00268</loc><lastmod>2015-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-icstm-tum-up-approach-to-the-3rd-chime-challenge-single-channel-lstm-speech-enhancement-with-multi-channel-correlation-shaping-dereverberation-and-lstm-language-models-1510.00268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-icstm-tum-up-approach-to-the-3rd-chime-challenge-single-channel-lstm-speech-enhancement-with-multi-channel-correlation-shaping-dereverberation-and-lstm-language-models-1510.00268"/></url>
<url><loc>https://scifaro.com/en/abs/proceedings-of-the-ace-challenge-workshop-a-satellite-event-of-ieee-waspaa-2015-1510.00383</loc><lastmod>2015-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proceedings-of-the-ace-challenge-workshop-a-satellite-event-of-ieee-waspaa-2015-1510.00383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proceedings-of-the-ace-challenge-workshop-a-satellite-event-of-ieee-waspaa-2015-1510.00383"/></url>
<url><loc>https://scifaro.com/en/abs/reverberation-time-estimation-on-the-ace-corpus-using-the-sdd-method-1510.01193</loc><lastmod>2015-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberation-time-estimation-on-the-ace-corpus-using-the-sdd-method-1510.01193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberation-time-estimation-on-the-ace-corpus-using-the-sdd-method-1510.01193"/></url>
<url><loc>https://scifaro.com/en/abs/a-waveform-representation-framework-for-high-quality-statistical-parametric-speech-synthesis-1510.01443</loc><lastmod>2015-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-waveform-representation-framework-for-high-quality-statistical-parametric-speech-synthesis-1510.01443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-waveform-representation-framework-for-high-quality-statistical-parametric-speech-synthesis-1510.01443"/></url>
<url><loc>https://scifaro.com/en/abs/music-viewed-by-its-entropy-content-a-novel-window-for-comparative-analysis-1510.01806</loc><lastmod>2017-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-viewed-by-its-entropy-content-a-novel-window-for-comparative-analysis-1510.01806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-viewed-by-its-entropy-content-a-novel-window-for-comparative-analysis-1510.01806"/></url>
<url><loc>https://scifaro.com/en/abs/a-language-model-based-approach-towards-large-scale-and-lightweight-language-identification-systems-1510.03602</loc><lastmod>2016-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-language-model-based-approach-towards-large-scale-and-lightweight-language-identification-systems-1510.03602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-language-model-based-approach-towards-large-scale-and-lightweight-language-identification-systems-1510.03602"/></url>
<url><loc>https://scifaro.com/en/abs/corpus-cofla-a-research-corpus-for-the-computational-study-of-flamenco-music-1510.04029</loc><lastmod>2015-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/corpus-cofla-a-research-corpus-for-the-computational-study-of-flamenco-music-1510.04029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/corpus-cofla-a-research-corpus-for-the-computational-study-of-flamenco-music-1510.04029"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-transcription-of-flamenco-singing-from-polyphonic-music-recordings-1510.04039</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-transcription-of-flamenco-singing-from-polyphonic-music-recordings-1510.04039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-transcription-of-flamenco-singing-from-polyphonic-music-recordings-1510.04039"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-one-to-many-problem-in-voice-conversion-by-equalizing-the-formant-locations-using-dynamic-frequency-warping-1510.04205</loc><lastmod>2015-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-one-to-many-problem-in-voice-conversion-by-equalizing-the-formant-locations-using-dynamic-frequency-warping-1510.04205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-one-to-many-problem-in-voice-conversion-by-equalizing-the-formant-locations-using-dynamic-frequency-warping-1510.04205"/></url>
<url><loc>https://scifaro.com/en/abs/a-variational-em-algorithm-for-the-separation-of-time-varying-convolutive-audio-mixtures-1510.04595</loc><lastmod>2016-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variational-em-algorithm-for-the-separation-of-time-varying-convolutive-audio-mixtures-1510.04595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variational-em-algorithm-for-the-separation-of-time-varying-convolutive-audio-mixtures-1510.04595"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-the-non-intrusive-room-acoustics-algorithm-with-the-ace-challenge-1510.04616</loc><lastmod>2015-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-the-non-intrusive-room-acoustics-algorithm-with-the-ace-challenge-1510.04616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-the-non-intrusive-room-acoustics-algorithm-with-the-ace-challenge-1510.04616"/></url>
<url><loc>https://scifaro.com/en/abs/joint-estimation-of-reverberation-time-and-direct-to-reverberation-ratio-from-speech-using-auditory-inspired-features-1510.04620</loc><lastmod>2015-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-estimation-of-reverberation-time-and-direct-to-reverberation-ratio-from-speech-using-auditory-inspired-features-1510.04620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-estimation-of-reverberation-time-and-direct-to-reverberation-ratio-from-speech-using-auditory-inspired-features-1510.04620"/></url>
<url><loc>https://scifaro.com/en/abs/srmr-variants-for-improved-blind-room-acoustics-characterization-1510.04707</loc><lastmod>2015-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/srmr-variants-for-improved-blind-room-acoustics-characterization-1510.04707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/srmr-variants-for-improved-blind-room-acoustics-characterization-1510.04707"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-and-timbre-analysis-of-tabla-strokes-1510.04880</loc><lastmod>2015-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-and-timbre-analysis-of-tabla-strokes-1510.04880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-and-timbre-analysis-of-tabla-strokes-1510.04880"/></url>
<url><loc>https://scifaro.com/en/abs/binary-speaker-embedding-1510.05937</loc><lastmod>2016-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binary-speaker-embedding-1510.05937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binary-speaker-embedding-1510.05937"/></url>
<url><loc>https://scifaro.com/en/abs/max-margin-metric-learning-for-speaker-recognition-1510.05940</loc><lastmod>2016-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/max-margin-metric-learning-for-speaker-recognition-1510.05940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/max-margin-metric-learning-for-speaker-recognition-1510.05940"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-approach-for-speech-enhancement-using-mog-model-and-neural-network-phoneme-classifier-1510.07315</loc><lastmod>2015-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-approach-for-speech-enhancement-using-mog-model-and-neural-network-phoneme-classifier-1510.07315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-approach-for-speech-enhancement-using-mog-model-and-neural-network-phoneme-classifier-1510.07315"/></url>
<url><loc>https://scifaro.com/en/abs/direct-to-reverberant-ratio-estimation-on-the-ace-corpus-using-a-two-channel-beamformer-1510.07546</loc><lastmod>2015-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direct-to-reverberant-ratio-estimation-on-the-ace-corpus-using-a-two-channel-beamformer-1510.07546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direct-to-reverberant-ratio-estimation-on-the-ace-corpus-using-a-two-channel-beamformer-1510.07546"/></url>
<url><loc>https://scifaro.com/en/abs/a-dictionary-learning-and-source-recovery-based-approach-to-classify-diverse-audio-sources-1510.07774</loc><lastmod>2015-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dictionary-learning-and-source-recovery-based-approach-to-classify-diverse-audio-sources-1510.07774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dictionary-learning-and-source-recovery-based-approach-to-classify-diverse-audio-sources-1510.07774"/></url>
<url><loc>https://scifaro.com/en/abs/musan-a-music-speech-and-noise-corpus-1510.08484</loc><lastmod>2015-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musan-a-music-speech-and-noise-corpus-1510.08484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musan-a-music-speech-and-noise-corpus-1510.08484"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-the-direct-to-reverberant-energy-ratio-using-a-spherical-microphone-array-1510.08950</loc><lastmod>2015-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-the-direct-to-reverberant-energy-ratio-using-a-spherical-microphone-array-1510.08950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-the-direct-to-reverberant-energy-ratio-using-a-spherical-microphone-array-1510.08950"/></url>
<url><loc>https://scifaro.com/en/abs/psd-estimation-in-beamspace-for-estimating-direct-to-reverberant-ratio-from-a-reverberant-speech-signal-1510.08963</loc><lastmod>2015-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psd-estimation-in-beamspace-for-estimating-direct-to-reverberant-ratio-from-a-reverberant-speech-signal-1510.08963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psd-estimation-in-beamspace-for-estimating-direct-to-reverberant-ratio-from-a-reverberant-speech-signal-1510.08963"/></url>
<url><loc>https://scifaro.com/en/abs/sparsity-based-algorithm-for-detecting-faults-in-rotating-machines-1511.00067</loc><lastmod>2016-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsity-based-algorithm-for-detecting-faults-in-rotating-machines-1511.00067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsity-based-algorithm-for-detecting-faults-in-rotating-machines-1511.00067"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-faults-in-rotating-machinery-using-periodic-time-frequency-sparsity-1511.00393</loc><lastmod>2016-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-faults-in-rotating-machinery-using-periodic-time-frequency-sparsity-1511.00393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-faults-in-rotating-machinery-using-periodic-time-frequency-sparsity-1511.00393"/></url>
<url><loc>https://scifaro.com/en/abs/fault-diagnosis-of-rolling-element-bearings-with-a-spectrum-searching-method-1511.03174</loc><lastmod>2016-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fault-diagnosis-of-rolling-element-bearings-with-a-spectrum-searching-method-1511.03174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fault-diagnosis-of-rolling-element-bearings-with-a-spectrum-searching-method-1511.03174"/></url>
<url><loc>https://scifaro.com/en/abs/combination-of-binaural-and-harmonic-masking-release-effects-in-the-detection-of-a-single-component-in-complex-tones-1511.03440</loc><lastmod>2017-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combination-of-binaural-and-harmonic-masking-release-effects-in-the-detection-of-a-single-component-in-complex-tones-1511.03440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combination-of-binaural-and-harmonic-masking-release-effects-in-the-detection-of-a-single-component-in-complex-tones-1511.03440"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-maximum-likelihood-t60-estimation-exploiting-subband-information-1511.04063</loc><lastmod>2015-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-maximum-likelihood-t60-estimation-exploiting-subband-information-1511.04063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-maximum-likelihood-t60-estimation-exploiting-subband-information-1511.04063"/></url>
<url><loc>https://scifaro.com/en/abs/quality-assessment-of-voice-converted-speech-using-articulatory-features-1511.04867</loc><lastmod>2015-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quality-assessment-of-voice-converted-speech-using-articulatory-features-1511.04867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quality-assessment-of-voice-converted-speech-using-articulatory-features-1511.04867"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-instrument-recognition-in-polyphonic-music-using-convolutional-neural-networks-1511.05520</loc><lastmod>2015-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-instrument-recognition-in-polyphonic-music-using-convolutional-neural-networks-1511.05520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-instrument-recognition-in-polyphonic-music-using-convolutional-neural-networks-1511.05520"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-vowel-tremolo-detection-using-low-level-audio-descriptors-1511.07008</loc><lastmod>2015-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-vowel-tremolo-detection-using-low-level-audio-descriptors-1511.07008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-vowel-tremolo-detection-using-low-level-audio-descriptors-1511.07008"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-voice-conversion-using-prosodic-and-high-resolution-spectral-features-1512.01809</loc><lastmod>2015-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-voice-conversion-using-prosodic-and-high-resolution-spectral-features-1512.01809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-voice-conversion-using-prosodic-and-high-resolution-spectral-features-1512.01809"/></url>
<url><loc>https://scifaro.com/en/abs/joint-time-frequency-scattering-for-audio-classification-1512.02125</loc><lastmod>2018-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-time-frequency-scattering-for-audio-classification-1512.02125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-time-frequency-scattering-for-audio-classification-1512.02125"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-single-and-multi-session-i-vector-speaker-recognition-1512.02560</loc><lastmod>2017-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-single-and-multi-session-i-vector-speaker-recognition-1512.02560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-single-and-multi-session-i-vector-speaker-recognition-1512.02560"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-a-geometrically-sampled-grid-in-the-srp-phat-for-localization-improvement-and-power-response-sensitivity-analysis-1512.03261</loc><lastmod>2018-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-a-geometrically-sampled-grid-in-the-srp-phat-for-localization-improvement-and-power-response-sensitivity-analysis-1512.03261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-a-geometrically-sampled-grid-in-the-srp-phat-for-localization-improvement-and-power-response-sensitivity-analysis-1512.03261"/></url>
<url><loc>https://scifaro.com/en/abs/trigonometric-dictionary-based-codec-for-music-compression-with-high-quality-recovery-1512.04243</loc><lastmod>2015-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trigonometric-dictionary-based-codec-for-music-compression-with-high-quality-recovery-1512.04243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trigonometric-dictionary-based-codec-for-music-compression-with-high-quality-recovery-1512.04243"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-study-of-the-vocal-tract-in-vowel-synthesis-a-comparison-between-1d-and-3d-acoustic-analysis-1512.05811</loc><lastmod>2015-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-study-of-the-vocal-tract-in-vowel-synthesis-a-comparison-between-1d-and-3d-acoustic-analysis-1512.05811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-study-of-the-vocal-tract-in-vowel-synthesis-a-comparison-between-1d-and-3d-acoustic-analysis-1512.05811"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-robust-adaptive-algorithm-for-underwater-acoustic-channel-equalization-1512.06222</loc><lastmod>2015-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-robust-adaptive-algorithm-for-underwater-acoustic-channel-equalization-1512.06222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-robust-adaptive-algorithm-for-underwater-acoustic-channel-equalization-1512.06222"/></url>
<url><loc>https://scifaro.com/en/abs/musical-instrument-sound-classification-with-deep-convolutional-neural-network-using-feature-fusion-approach-1512.07370</loc><lastmod>2015-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-instrument-sound-classification-with-deep-convolutional-neural-network-using-feature-fusion-approach-1512.07370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-instrument-sound-classification-with-deep-convolutional-neural-network-using-feature-fusion-approach-1512.07370"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-audio-to-score-alignment-of-music-performances-containing-errors-and-arbitrary-repeats-and-skips-1512.07748</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-audio-to-score-alignment-of-music-performances-containing-errors-and-arbitrary-repeats-and-skips-1512.07748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-audio-to-score-alignment-of-music-performances-containing-errors-and-arbitrary-repeats-and-skips-1512.07748"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-audio-signal-source-separation-based-on-an-interchannel-loudness-vector-sum-1512.08075</loc><lastmod>2015-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-audio-signal-source-separation-based-on-an-interchannel-loudness-vector-sum-1512.08075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-audio-signal-source-separation-based-on-an-interchannel-loudness-vector-sum-1512.08075"/></url>
<url><loc>https://scifaro.com/en/abs/technical-report-a-tool-for-measuring-prosodic-accommodation-1512.08982</loc><lastmod>2017-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/technical-report-a-tool-for-measuring-prosodic-accommodation-1512.08982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/technical-report-a-tool-for-measuring-prosodic-accommodation-1512.08982"/></url>
<url><loc>https://scifaro.com/en/abs/wavelet-scattering-on-the-pitch-spiral-1601.00287</loc><lastmod>2016-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavelet-scattering-on-the-pitch-spiral-1601.00287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavelet-scattering-on-the-pitch-spiral-1601.00287"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-rhythmic-staccato-vocalization-based-on-frequency-demodulation-for-laughter-detection-in-conversational-meetings-1601.00833</loc><lastmod>2016-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-rhythmic-staccato-vocalization-based-on-frequency-demodulation-for-laughter-detection-in-conversational-meetings-1601.00833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-rhythmic-staccato-vocalization-based-on-frequency-demodulation-for-laughter-detection-in-conversational-meetings-1601.00833"/></url>
<url><loc>https://scifaro.com/en/abs/gender-identification-using-mfcc-for-telephone-applications-a-comparative-study-1601.01577</loc><lastmod>2016-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-identification-using-mfcc-for-telephone-applications-a-comparative-study-1601.01577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-identification-using-mfcc-for-telephone-applications-a-comparative-study-1601.01577"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-transposition-of-melodic-sequences-on-digital-devices-1601.02069</loc><lastmod>2016-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-transposition-of-melodic-sequences-on-digital-devices-1601.02069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-transposition-of-melodic-sequences-on-digital-devices-1601.02069"/></url>
<url><loc>https://scifaro.com/en/abs/wavelet-speech-enhancement-based-on-nonnegative-matrix-factorization-1601.02309</loc><lastmod>2016-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavelet-speech-enhancement-based-on-nonnegative-matrix-factorization-1601.02309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavelet-speech-enhancement-based-on-nonnegative-matrix-factorization-1601.02309"/></url>
<url><loc>https://scifaro.com/en/abs/repetitive-transients-extraction-algorithm-for-detecting-bearing-faults-1601.02339</loc><lastmod>2016-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repetitive-transients-extraction-algorithm-for-detecting-bearing-faults-1601.02339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repetitive-transients-extraction-algorithm-for-detecting-bearing-faults-1601.02339"/></url>
<url><loc>https://scifaro.com/en/abs/categorization-of-tablas-by-wavelet-analysis-1601.02489</loc><lastmod>2016-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/categorization-of-tablas-by-wavelet-analysis-1601.02489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/categorization-of-tablas-by-wavelet-analysis-1601.02489"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-determination-of-chord-roots-1601.02546</loc><lastmod>2016-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-determination-of-chord-roots-1601.02546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-determination-of-chord-roots-1601.02546"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-frame-based-nonlinear-prediction-system-for-automatic-speech-coding-1601.06008</loc><lastmod>2016-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-frame-based-nonlinear-prediction-system-for-automatic-speech-coding-1601.06008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-frame-based-nonlinear-prediction-system-for-automatic-speech-coding-1601.06008"/></url>
<url><loc>https://scifaro.com/en/abs/a-perceptually-motivated-filter-bank-with-perfect-reconstruction-for-audio-signal-processing-1601.06652</loc><lastmod>2016-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-perceptually-motivated-filter-bank-with-perfect-reconstruction-for-audio-signal-processing-1601.06652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-perceptually-motivated-filter-bank-with-perfect-reconstruction-for-audio-signal-processing-1601.06652"/></url>
<url><loc>https://scifaro.com/en/abs/categorization-of-stringed-instruments-with-multifractal-detrended-fluctuation-analysis-1601.07709</loc><lastmod>2016-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/categorization-of-stringed-instruments-with-multifractal-detrended-fluctuation-analysis-1601.07709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/categorization-of-stringed-instruments-with-multifractal-detrended-fluctuation-analysis-1601.07709"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-topological-fingerprint-of-music-1602.00739</loc><lastmod>2016-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-topological-fingerprint-of-music-1602.00739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-topological-fingerprint-of-music-1602.00739"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-deep-neural-networks-postfiltering-for-improving-the-quality-of-synthetic-voices-1602.02656</loc><lastmod>2016-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-deep-neural-networks-postfiltering-for-improving-the-quality-of-synthetic-voices-1602.02656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-deep-neural-networks-postfiltering-for-improving-the-quality-of-synthetic-voices-1602.02656"/></url>
<url><loc>https://scifaro.com/en/abs/a-high-quality-speech-and-audio-codec-with-less-than-10-ms-delay-1602.05526</loc><lastmod>2016-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-high-quality-speech-and-audio-codec-with-less-than-10-ms-delay-1602.05526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-high-quality-speech-and-audio-codec-with-less-than-10-ms-delay-1602.05526"/></url>
<url><loc>https://scifaro.com/en/abs/audio-recording-device-identification-based-on-deep-learning-1602.05682</loc><lastmod>2016-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-recording-device-identification-based-on-deep-learning-1602.05682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-recording-device-identification-based-on-deep-learning-1602.05682"/></url>
<url><loc>https://scifaro.com/en/abs/eeg-informed-attended-speaker-extraction-from-recorded-speech-mixtures-with-application-in-neuro-steered-hearing-prostheses-1602.05702</loc><lastmod>2019-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg-informed-attended-speaker-extraction-from-recorded-speech-mixtures-with-application-in-neuro-steered-hearing-prostheses-1602.05702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg-informed-attended-speaker-extraction-from-recorded-speech-mixtures-with-application-in-neuro-steered-hearing-prostheses-1602.05702"/></url>
<url><loc>https://scifaro.com/en/abs/an-iterative-linearised-solution-to-the-sinusoidal-parameter-estimation-problem-1602.05900</loc><lastmod>2016-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-iterative-linearised-solution-to-the-sinusoidal-parameter-estimation-problem-1602.05900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-iterative-linearised-solution-to-the-sinusoidal-parameter-estimation-problem-1602.05900"/></url>
<url><loc>https://scifaro.com/en/abs/near-field-signal-acquisition-for-smartglasses-using-two-acoustic-vector-sensors-1602.06582</loc><lastmod>2016-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/near-field-signal-acquisition-for-smartglasses-using-two-acoustic-vector-sensors-1602.06582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/near-field-signal-acquisition-for-smartglasses-using-two-acoustic-vector-sensors-1602.06582"/></url>
<url><loc>https://scifaro.com/en/abs/improving-trajectory-modelling-for-dnn-based-speech-synthesis-by-using-stacked-bottleneck-features-and-minimum-generation-error-training-1602.06727</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-trajectory-modelling-for-dnn-based-speech-synthesis-by-using-stacked-bottleneck-features-and-minimum-generation-error-training-1602.06727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-trajectory-modelling-for-dnn-based-speech-synthesis-by-using-stacked-bottleneck-features-and-minimum-generation-error-training-1602.06727"/></url>
<url><loc>https://scifaro.com/en/abs/the-ibm-2016-speaker-recognition-system-1602.07291</loc><lastmod>2016-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ibm-2016-speaker-recognition-system-1602.07291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ibm-2016-speaker-recognition-system-1602.07291"/></url>
<url><loc>https://scifaro.com/en/abs/improved-accent-classification-combining-phonetic-vowels-with-acoustic-features-1602.07394</loc><lastmod>2016-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-accent-classification-combining-phonetic-vowels-with-acoustic-features-1602.07394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-accent-classification-combining-phonetic-vowels-with-acoustic-features-1602.07394"/></url>
<url><loc>https://scifaro.com/en/abs/breath-activity-detection-algorithm-1602.07767</loc><lastmod>2016-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breath-activity-detection-algorithm-1602.07767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breath-activity-detection-algorithm-1602.07767"/></url>
<url><loc>https://scifaro.com/en/abs/on-adjusting-the-learning-rate-in-frequency-domain-echo-cancellation-with-double-talk-1602.08044</loc><lastmod>2016-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-adjusting-the-learning-rate-in-frequency-domain-echo-cancellation-with-double-talk-1602.08044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-adjusting-the-learning-rate-in-frequency-domain-echo-cancellation-with-double-talk-1602.08044"/></url>
<url><loc>https://scifaro.com/en/abs/pca-lda-approach-for-text-independent-speaker-recognition-1602.08045</loc><lastmod>2016-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pca-lda-approach-for-text-independent-speaker-recognition-1602.08045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pca-lda-approach-for-text-independent-speaker-recognition-1602.08045"/></url>
<url><loc>https://scifaro.com/en/abs/pca-method-for-automated-detection-of-mispronounced-words-1602.08128</loc><lastmod>2016-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pca-method-for-automated-detection-of-mispronounced-words-1602.08128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pca-method-for-automated-detection-of-mispronounced-words-1602.08128"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-frequency-cepstral-coefficients-for-word-mispronunciation-detection-1602.08132</loc><lastmod>2016-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-frequency-cepstral-coefficients-for-word-mispronunciation-detection-1602.08132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-frequency-cepstral-coefficients-for-word-mispronunciation-detection-1602.08132"/></url>
<url><loc>https://scifaro.com/en/abs/extension-spectrale-d-un-signal-de-parole-de-la-bande-t-el-ephonique-a-la-bande-am-1602.08185</loc><lastmod>2016-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extension-spectrale-d-un-signal-de-parole-de-la-bande-t-el-ephonique-a-la-bande-am-1602.08185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extension-spectrale-d-un-signal-de-parole-de-la-bande-t-el-ephonique-a-la-bande-am-1602.08185"/></url>
<url><loc>https://scifaro.com/en/abs/bandwidth-extension-of-narrowband-speech-for-low-bit-rate-wideband-coding-1602.08215</loc><lastmod>2016-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandwidth-extension-of-narrowband-speech-for-low-bit-rate-wideband-coding-1602.08215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandwidth-extension-of-narrowband-speech-for-low-bit-rate-wideband-coding-1602.08215"/></url>
<url><loc>https://scifaro.com/en/abs/occupancy-estimation-in-smart-buildings-using-audio-processing-techniques-1602.08507</loc><lastmod>2016-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/occupancy-estimation-in-smart-buildings-using-audio-processing-techniques-1602.08507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/occupancy-estimation-in-smart-buildings-using-audio-processing-techniques-1602.08507"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-robust-frequency-domain-echo-canceller-with-closed-loop-learning-rate-adaptation-1602.08609</loc><lastmod>2016-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-robust-frequency-domain-echo-canceller-with-closed-loop-learning-rate-adaptation-1602.08609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-robust-frequency-domain-echo-canceller-with-closed-loop-learning-rate-adaptation-1602.08609"/></url>
<url><loc>https://scifaro.com/en/abs/perceptually-motivated-nonlinear-channel-decorrelation-for-stereo-acoustic-echo-cancellation-1602.08633</loc><lastmod>2016-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptually-motivated-nonlinear-channel-decorrelation-for-stereo-acoustic-echo-cancellation-1602.08633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptually-motivated-nonlinear-channel-decorrelation-for-stereo-acoustic-echo-cancellation-1602.08633"/></url>
<url><loc>https://scifaro.com/en/abs/speex-a-free-codec-for-free-speech-1602.08668</loc><lastmod>2016-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speex-a-free-codec-for-free-speech-1602.08668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speex-a-free-codec-for-free-speech-1602.08668"/></url>
<url><loc>https://scifaro.com/en/abs/audio-word2vec-unsupervised-learning-of-audio-segment-representations-using-sequence-to-sequence-autoencoder-1603.00982</loc><lastmod>2016-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-word2vec-unsupervised-learning-of-audio-segment-representations-using-sequence-to-sequence-autoencoder-1603.00982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-word2vec-unsupervised-learning-of-audio-segment-representations-using-sequence-to-sequence-autoencoder-1603.00982"/></url>
<url><loc>https://scifaro.com/en/abs/an-argument-based-creative-assistant-for-harmonic-blending-1603.01770</loc><lastmod>2016-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-argument-based-creative-assistant-for-harmonic-blending-1603.01770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-argument-based-creative-assistant-for-harmonic-blending-1603.01770"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-iterative-sinusoidal-parameter-estimation-1603.01824</loc><lastmod>2016-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-iterative-sinusoidal-parameter-estimation-1603.01824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-iterative-sinusoidal-parameter-estimation-1603.01824"/></url>
<url><loc>https://scifaro.com/en/abs/improved-noise-weighting-in-celp-coding-of-speech-applying-the-vorbis-psychoacoustic-model-to-speex-1603.01863</loc><lastmod>2016-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-noise-weighting-in-celp-coding-of-speech-applying-the-vorbis-psychoacoustic-model-to-speex-1603.01863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-noise-weighting-in-celp-coding-of-speech-applying-the-vorbis-psychoacoustic-model-to-speex-1603.01863"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-array-post-filter-for-separation-of-simultaneous-non-stationary-sources-1603.03215</loc><lastmod>2016-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-array-post-filter-for-separation-of-simultaneous-non-stationary-sources-1603.03215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-array-post-filter-for-separation-of-simultaneous-non-stationary-sources-1603.03215"/></url>
<url><loc>https://scifaro.com/en/abs/channel-decorrelation-for-stereo-acoustic-echo-cancellation-in-high-quality-audio-communication-1603.03364</loc><lastmod>2016-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-decorrelation-for-stereo-acoustic-echo-cancellation-in-high-quality-audio-communication-1603.03364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-decorrelation-for-stereo-acoustic-echo-cancellation-in-high-quality-audio-communication-1603.03364"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-detection-goes-noisy-an-analysis-of-synthetic-speech-detection-in-the-presence-of-additive-noise-1603.03947</loc><lastmod>2016-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-detection-goes-noisy-an-analysis-of-synthetic-speech-detection-in-the-presence-of-additive-noise-1603.03947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-detection-goes-noisy-an-analysis-of-synthetic-speech-detection-in-the-presence-of-additive-noise-1603.03947"/></url>
<url><loc>https://scifaro.com/en/abs/performance-analysis-of-source-image-estimators-in-blind-source-separation-1603.04179</loc><lastmod>2017-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-analysis-of-source-image-estimators-in-blind-source-separation-1603.04179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-analysis-of-source-image-estimators-in-blind-source-separation-1603.04179"/></url>
<url><loc>https://scifaro.com/en/abs/novel-speech-features-for-improved-detection-of-spoofing-attacks-1603.04264</loc><lastmod>2016-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-speech-features-for-improved-detection-of-spoofing-attacks-1603.04264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-speech-features-for-improved-detection-of-spoofing-attacks-1603.04264"/></url>
<url><loc>https://scifaro.com/en/abs/guitar-solos-as-networks-1603.04979</loc><lastmod>2016-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guitar-solos-as-networks-1603.04979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guitar-solos-as-networks-1603.04979"/></url>
<url><loc>https://scifaro.com/en/abs/modified-group-delay-based-multipitch-estimation-in-co-channel-speech-1603.05435</loc><lastmod>2016-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modified-group-delay-based-multipitch-estimation-in-co-channel-speech-1603.05435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modified-group-delay-based-multipitch-estimation-in-co-channel-speech-1603.05435"/></url>
<url><loc>https://scifaro.com/en/abs/a-pairwise-approach-to-simultaneous-onset-offset-detection-for-singing-voice-using-correntropy-1603.06065</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pairwise-approach-to-simultaneous-onset-offset-detection-for-singing-voice-using-correntropy-1603.06065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pairwise-approach-to-simultaneous-onset-offset-detection-for-singing-voice-using-correntropy-1603.06065"/></url>
<url><loc>https://scifaro.com/en/abs/deductive-refinement-of-species-labelling-in-weakly-labelled-birdsong-recordings-1603.07173</loc><lastmod>2016-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deductive-refinement-of-species-labelling-in-weakly-labelled-birdsong-recordings-1603.07173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deductive-refinement-of-species-labelling-in-weakly-labelled-birdsong-recordings-1603.07173"/></url>
<url><loc>https://scifaro.com/en/abs/individual-identity-in-songbirds-signal-representations-and-metric-learning-for-locating-the-information-in-complex-corvid-calls-1603.07236</loc><lastmod>2016-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/individual-identity-in-songbirds-signal-representations-and-metric-learning-for-locating-the-information-in-complex-corvid-calls-1603.07236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/individual-identity-in-songbirds-signal-representations-and-metric-learning-for-locating-the-information-in-complex-corvid-calls-1603.07236"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-impact-of-localization-errors-on-hrtf-based-robust-least-squares-beamforming-1603.08740</loc><lastmod>2016-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-impact-of-localization-errors-on-hrtf-based-robust-least-squares-beamforming-1603.08740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-impact-of-localization-errors-on-hrtf-based-robust-least-squares-beamforming-1603.08740"/></url>
<url><loc>https://scifaro.com/en/abs/mathematical-harmony-analysis-1603.08904</loc><lastmod>2017-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mathematical-harmony-analysis-1603.08904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mathematical-harmony-analysis-1603.08904"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-separation-and-vocal-f0-estimation-based-on-mutual-combination-of-robust-principal-component-analysis-and-subharmonic-summation-1604.00192</loc><lastmod>2016-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-separation-and-vocal-f0-estimation-based-on-mutual-combination-of-robust-principal-component-analysis-and-subharmonic-summation-1604.00192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-separation-and-vocal-f0-estimation-based-on-mutual-combination-of-robust-principal-component-analysis-and-subharmonic-summation-1604.00192"/></url>
<url><loc>https://scifaro.com/en/abs/recurrent-neural-networks-for-polyphonic-sound-event-detection-in-real-life-recordings-1604.00861</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recurrent-neural-networks-for-polyphonic-sound-event-detection-in-real-life-recordings-1604.00861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recurrent-neural-networks-for-polyphonic-sound-event-detection-in-real-life-recordings-1604.00861"/></url>
<url><loc>https://scifaro.com/en/abs/ragas-in-bollywood-music-a-microscopic-view-through-multrifractal-cross-correlation-method-1604.02243</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ragas-in-bollywood-music-a-microscopic-view-through-multrifractal-cross-correlation-method-1604.02243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ragas-in-bollywood-music-a-microscopic-view-through-multrifractal-cross-correlation-method-1604.02243"/></url>
<url><loc>https://scifaro.com/en/abs/variation-of-singing-styles-within-a-particular-gharana-of-hindustani-classical-music-a-nonlinear-multifractal-study-1604.02250</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variation-of-singing-styles-within-a-particular-gharana-of-hindustani-classical-music-a-nonlinear-multifractal-study-1604.02250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variation-of-singing-styles-within-a-particular-gharana-of-hindustani-classical-music-a-nonlinear-multifractal-study-1604.02250"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-speech-recognition-using-multi-channel-based-channel-selection-and-channelweighting-1604.03276</loc><lastmod>2016-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-speech-recognition-using-multi-channel-based-channel-selection-and-channelweighting-1604.03276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-speech-recognition-using-multi-channel-based-channel-selection-and-channelweighting-1604.03276"/></url>
<url><loc>https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-speech-recognition-in-adverse-real-world-environments-1604.03393</loc><lastmod>2017-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-speech-recognition-in-adverse-real-world-environments-1604.03393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-coherence-based-spectral-enhancement-for-speech-recognition-in-adverse-real-world-environments-1604.03393"/></url>
<url><loc>https://scifaro.com/en/abs/composition-of-deep-and-spiking-neural-networks-for-very-low-bit-rate-speech-coding-1604.04383</loc><lastmod>2016-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composition-of-deep-and-spiking-neural-networks-for-very-low-bit-rate-speech-coding-1604.04383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composition-of-deep-and-spiking-neural-networks-for-very-low-bit-rate-speech-coding-1604.04383"/></url>
<url><loc>https://scifaro.com/en/abs/two-pairwise-iterative-schemes-for-high-dimensional-blind-source-separation-1604.04669</loc><lastmod>2016-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-pairwise-iterative-schemes-for-high-dimensional-blind-source-separation-1604.04669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-pairwise-iterative-schemes-for-high-dimensional-blind-source-separation-1604.04669"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-acoustic-event-detection-1604.07160</loc><lastmod>2016-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-acoustic-event-detection-1604.07160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-acoustic-event-detection-1604.07160"/></url>
<url><loc>https://scifaro.com/en/abs/accent-classification-with-phonetic-vowel-representation-1604.08095</loc><lastmod>2016-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-classification-with-phonetic-vowel-representation-1604.08095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-classification-with-phonetic-vowel-representation-1604.08095"/></url>
<url><loc>https://scifaro.com/en/abs/robust-joint-alignment-of-multiple-versions-of-a-piece-of-music-1604.08516</loc><lastmod>2016-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-joint-alignment-of-multiple-versions-of-a-piece-of-music-1604.08516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-joint-alignment-of-multiple-versions-of-a-piece-of-music-1604.08516"/></url>
<url><loc>https://scifaro.com/en/abs/learning-compact-structural-representations-for-audio-events-using-regressor-banks-1604.08716</loc><lastmod>2016-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-compact-structural-representations-for-audio-events-using-regressor-banks-1604.08716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-compact-structural-representations-for-audio-events-using-regressor-banks-1604.08716"/></url>
<url><loc>https://scifaro.com/en/abs/music-transcription-modelling-and-composition-using-deep-learning-1604.08723</loc><lastmod>2016-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-transcription-modelling-and-composition-using-deep-learning-1604.08723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-transcription-modelling-and-composition-using-deep-learning-1604.08723"/></url>
<url><loc>https://scifaro.com/en/abs/joint-sound-source-separation-and-speaker-recognition-1604.08852</loc><lastmod>2016-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-sound-source-separation-and-speaker-recognition-1604.08852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-sound-source-separation-and-speaker-recognition-1604.08852"/></url>
<url><loc>https://scifaro.com/en/abs/diagonal-unloading-beamforming-for-source-localization-1605.00810</loc><lastmod>2018-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diagonal-unloading-beamforming-for-source-localization-1605.00810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diagonal-unloading-beamforming-for-source-localization-1605.00810"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-using-outlier-detection-1605.01329</loc><lastmod>2016-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-outlier-detection-1605.01329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-outlier-detection-1605.01329"/></url>
<url><loc>https://scifaro.com/en/abs/dctnet-and-pcanet-for-acoustic-signal-feature-extraction-1605.01755</loc><lastmod>2016-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dctnet-and-pcanet-for-acoustic-signal-feature-extraction-1605.01755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dctnet-and-pcanet-for-acoustic-signal-feature-extraction-1605.01755"/></url>
<url><loc>https://scifaro.com/en/abs/audio-event-detection-using-weakly-labeled-data-1605.02401</loc><lastmod>2016-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-event-detection-using-weakly-labeled-data-1605.02401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-event-detection-using-weakly-labeled-data-1605.02401"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-in-multiple-noise-conditions-using-deep-neural-networks-1605.02427</loc><lastmod>2016-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-in-multiple-noise-conditions-using-deep-neural-networks-1605.02427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-in-multiple-noise-conditions-using-deep-neural-networks-1605.02427"/></url>
<url><loc>https://scifaro.com/en/abs/sub-vector-extraction-and-cascade-post-processing-for-speaker-verification-using-mllr-super-vectors-1605.03724</loc><lastmod>2016-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-vector-extraction-and-cascade-post-processing-for-speaker-verification-using-mllr-super-vectors-1605.03724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-vector-extraction-and-cascade-post-processing-for-speaker-verification-using-mllr-super-vectors-1605.03724"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-networks-on-the-pitch-spiral-for-musical-instrument-recognition-1605.06644</loc><lastmod>2017-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-networks-on-the-pitch-spiral-for-musical-instrument-recognition-1605.06644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-networks-on-the-pitch-spiral-for-musical-instrument-recognition-1605.06644"/></url>
<url><loc>https://scifaro.com/en/abs/madmom-a-new-python-audio-and-music-signal-processing-library-1605.07008</loc><lastmod>2016-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/madmom-a-new-python-audio-and-music-signal-processing-library-1605.07008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/madmom-a-new-python-audio-and-music-signal-processing-library-1605.07008"/></url>
<url><loc>https://scifaro.com/en/abs/complex-nmf-under-phase-constraints-based-on-signal-modeling-application-to-audio-source-separation-1605.07466</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-nmf-under-phase-constraints-based-on-signal-modeling-application-to-audio-source-separation-1605.07466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-nmf-under-phase-constraints-based-on-signal-modeling-application-to-audio-source-separation-1605.07466"/></url>
<url><loc>https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-with-linear-unwrapping-application-to-audio-signal-restoration-1605.07467</loc><lastmod>2016-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-with-linear-unwrapping-application-to-audio-signal-restoration-1605.07467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-with-linear-unwrapping-application-to-audio-signal-restoration-1605.07467"/></url>
<url><loc>https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-based-on-a-model-of-repeated-audio-events-1605.07468</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-based-on-a-model-of-repeated-audio-events-1605.07468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-reconstruction-of-spectrograms-based-on-a-model-of-repeated-audio-events-1605.07468"/></url>
<url><loc>https://scifaro.com/en/abs/phase-recovery-in-nmf-for-audio-source-separation-an-insightful-benchmark-1605.07469</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-recovery-in-nmf-for-audio-source-separation-an-insightful-benchmark-1605.07469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-recovery-in-nmf-for-audio-source-separation-an-insightful-benchmark-1605.07469"/></url>
<url><loc>https://scifaro.com/en/abs/using-instantaneous-frequency-and-aperiodicity-detection-to-estimate-f0-for-high-quality-speech-synthesis-1605.07809</loc><lastmod>2018-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-instantaneous-frequency-and-aperiodicity-detection-to-estimate-f0-for-high-quality-speech-synthesis-1605.07809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-instantaneous-frequency-and-aperiodicity-detection-to-estimate-f0-for-high-quality-speech-synthesis-1605.07809"/></url>
<url><loc>https://scifaro.com/en/abs/robust-downbeat-tracking-using-an-ensemble-of-convolutional-networks-1605.08396</loc><lastmod>2016-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-downbeat-tracking-using-an-ensemble-of-convolutional-networks-1605.08396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-downbeat-tracking-using-an-ensemble-of-convolutional-networks-1605.08396"/></url>
<url><loc>https://scifaro.com/en/abs/the-implementation-of-low-cost-urban-acoustic-monitoring-devices-1605.08450</loc><lastmod>2016-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-implementation-of-low-cost-urban-acoustic-monitoring-devices-1605.08450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-implementation-of-low-cost-urban-acoustic-monitoring-devices-1605.08450"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-networks-for-predominant-instrument-recognition-in-polyphonic-music-1605.09507</loc><lastmod>2016-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-for-predominant-instrument-recognition-in-polyphonic-music-1605.09507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-for-predominant-instrument-recognition-in-polyphonic-music-1605.09507"/></url>
<url><loc>https://scifaro.com/en/abs/nonnegative-tensor-factorization-with-frequency-modulation-cues-for-blind-audio-source-separation-1606.00037</loc><lastmod>2016-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonnegative-tensor-factorization-with-frequency-modulation-cues-for-blind-audio-source-separation-1606.00037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonnegative-tensor-factorization-with-frequency-modulation-cues-for-blind-audio-source-separation-1606.00037"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-tagging-using-deep-convolutional-neural-networks-1606.00298</loc><lastmod>2016-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-tagging-using-deep-convolutional-neural-networks-1606.00298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-tagging-using-deep-convolutional-neural-networks-1606.00298"/></url>
<url><loc>https://scifaro.com/en/abs/piano-transcription-in-the-studio-using-an-extensible-alternating-directions-framework-1606.00785</loc><lastmod>2016-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/piano-transcription-in-the-studio-using-an-extensible-alternating-directions-framework-1606.00785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/piano-transcription-in-the-studio-using-an-extensible-alternating-directions-framework-1606.00785"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-symbolic-music-beyond-the-piano-roll-1606.01368</loc><lastmod>2016-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-symbolic-music-beyond-the-piano-roll-1606.01368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-symbolic-music-beyond-the-piano-roll-1606.01368"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-data-version-1-0-1606.02542</loc><lastmod>2016-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-data-version-1-0-1606.02542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-data-version-1-0-1606.02542"/></url>
<url><loc>https://scifaro.com/en/abs/audio-content-based-geotagging-in-multimedia-1606.02816</loc><lastmod>2016-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-content-based-geotagging-in-multimedia-1606.02816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-content-based-geotagging-in-multimedia-1606.02816"/></url>
<url><loc>https://scifaro.com/en/abs/the-horse-inside-seeking-causes-behind-the-behaviours-of-music-content-analysis-systems-1606.03044</loc><lastmod>2016-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-horse-inside-seeking-causes-behind-the-behaviours-of-music-content-analysis-systems-1606.03044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-horse-inside-seeking-causes-behind-the-behaviours-of-music-content-analysis-systems-1606.03044"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-characterization-of-environments-ace-challenge-results-technical-report-1606.03365</loc><lastmod>2017-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-characterization-of-environments-ace-challenge-results-technical-report-1606.03365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-characterization-of-environments-ace-challenge-results-technical-report-1606.03365"/></url>
<url><loc>https://scifaro.com/en/abs/bigear-inferring-the-ambient-and-emotional-correlates-from-smartphone-based-acoustic-big-data-1606.03636</loc><lastmod>2016-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigear-inferring-the-ambient-and-emotional-correlates-from-smartphone-based-acoustic-big-data-1606.03636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigear-inferring-the-ambient-and-emotional-correlates-from-smartphone-based-acoustic-big-data-1606.03636"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-scalable-audio-content-analysis-1606.03664</loc><lastmod>2016-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-scalable-audio-content-analysis-1606.03664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-scalable-audio-content-analysis-1606.03664"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-bottleneck-representation-from-sequence-auto-encoder-1606.05844</loc><lastmod>2016-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-bottleneck-representation-from-sequence-auto-encoder-1606.05844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-bottleneck-representation-from-sequence-auto-encoder-1606.05844"/></url>
<url><loc>https://scifaro.com/en/abs/fast-compact-and-high-quality-lstm-rnn-based-statistical-parametric-speech-synthesizers-for-mobile-devices-1606.06061</loc><lastmod>2016-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-compact-and-high-quality-lstm-rnn-based-statistical-parametric-speech-synthesizers-for-mobile-devices-1606.06061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-compact-and-high-quality-lstm-rnn-based-statistical-parametric-speech-synthesizers-for-mobile-devices-1606.06061"/></url>
<url><loc>https://scifaro.com/en/abs/polymetric-rhythmic-feel-for-a-cognitive-drum-computer-1606.06197</loc><lastmod>2016-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polymetric-rhythmic-feel-for-a-cognitive-drum-computer-1606.06197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polymetric-rhythmic-feel-for-a-cognitive-drum-computer-1606.06197"/></url>
<url><loc>https://scifaro.com/en/abs/uncalibrated-3d-room-reconstruction-from-sound-1606.06258</loc><lastmod>2016-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncalibrated-3d-room-reconstruction-from-sound-1606.06258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncalibrated-3d-room-reconstruction-from-sound-1606.06258"/></url>
<url><loc>https://scifaro.com/en/abs/a-speaker-diarization-system-for-studying-peer-led-team-learning-groups-1606.07136</loc><lastmod>2016-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speaker-diarization-system-for-studying-peer-led-team-learning-groups-1606.07136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speaker-diarization-system-for-studying-peer-led-team-learning-groups-1606.07136"/></url>
<url><loc>https://scifaro.com/en/abs/an-active-machine-hearing-system-for-auditory-stream-segregation-1606.07598</loc><lastmod>2016-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-active-machine-hearing-system-for-auditory-stream-segregation-1606.07598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-active-machine-hearing-system-for-auditory-stream-segregation-1606.07598"/></url>
<url><loc>https://scifaro.com/en/abs/penambahan-emosi-menggunakan-metode-manipulasi-prosodi-untuk-sistem-text-to-speech-bahasa-indonesia-1606.09222</loc><lastmod>2016-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/penambahan-emosi-menggunakan-metode-manipulasi-prosodi-untuk-sistem-text-to-speech-bahasa-indonesia-1606.09222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/penambahan-emosi-menggunakan-metode-manipulasi-prosodi-untuk-sistem-text-to-speech-bahasa-indonesia-1606.09222"/></url>
<url><loc>https://scifaro.com/en/abs/spherical-harmonic-signal-covariance-and-sound-field-diffuseness-1607.00211</loc><lastmod>2016-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spherical-harmonic-signal-covariance-and-sound-field-diffuseness-1607.00211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spherical-harmonic-signal-covariance-and-sound-field-diffuseness-1607.00211"/></url>
<url><loc>https://scifaro.com/en/abs/car-forest-joint-classification-regression-decision-forests-for-overlapping-audio-event-detection-1607.02306</loc><lastmod>2016-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/car-forest-joint-classification-regression-decision-forests-for-overlapping-audio-event-detection-1607.02306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/car-forest-joint-classification-regression-decision-forests-for-overlapping-audio-event-detection-1607.02306"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-convolutional-neural-network-and-multiple-width-frequency-delta-data-augmentation-1607.02383</loc><lastmod>2016-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-convolutional-neural-network-and-multiple-width-frequency-delta-data-augmentation-1607.02383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-convolutional-neural-network-and-multiple-width-frequency-delta-data-augmentation-1607.02383"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-feature-learning-based-on-deep-models-for-environmental-audio-tagging-1607.03681</loc><lastmod>2017-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-feature-learning-based-on-deep-models-for-environmental-audio-tagging-1607.03681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-feature-learning-based-on-deep-models-for-environmental-audio-tagging-1607.03681"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-learning-for-dnn-based-acoustic-scene-classification-1607.03682</loc><lastmod>2016-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-learning-for-dnn-based-acoustic-scene-classification-1607.03682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-learning-for-dnn-based-acoustic-scene-classification-1607.03682"/></url>
<url><loc>https://scifaro.com/en/abs/audiopairbank-towards-a-large-scale-tag-pair-based-audio-content-analysis-1607.03766</loc><lastmod>2018-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiopairbank-towards-a-large-scale-tag-pair-based-audio-content-analysis-1607.03766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiopairbank-towards-a-large-scale-tag-pair-based-audio-content-analysis-1607.03766"/></url>
<url><loc>https://scifaro.com/en/abs/dcar-a-discriminative-and-compact-audio-representation-to-improve-event-detection-1607.04378</loc><lastmod>2016-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcar-a-discriminative-and-compact-audio-representation-to-improve-event-detection-1607.04378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcar-a-discriminative-and-compact-audio-representation-to-improve-event-detection-1607.04378"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-environmental-sound-recognition-performance-versus-computational-cost-1607.04589</loc><lastmod>2016-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-environmental-sound-recognition-performance-versus-computational-cost-1607.04589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-environmental-sound-recognition-performance-versus-computational-cost-1607.04589"/></url>
<url><loc>https://scifaro.com/en/abs/features-and-kernels-for-audio-event-recognition-1607.05765</loc><lastmod>2016-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/features-and-kernels-for-audio-event-recognition-1607.05765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/features-and-kernels-for-audio-event-recognition-1607.05765"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-based-robust-least-squares-frequency-invariant-polynomial-beamforming-1607.06642</loc><lastmod>2016-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-based-robust-least-squares-frequency-invariant-polynomial-beamforming-1607.06642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-based-robust-least-squares-frequency-invariant-polynomial-beamforming-1607.06642"/></url>
<url><loc>https://scifaro.com/en/abs/inpainting-of-long-audio-segments-with-similarity-graphs-1607.06667</loc><lastmod>2018-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inpainting-of-long-audio-segments-with-similarity-graphs-1607.06667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inpainting-of-long-audio-segments-with-similarity-graphs-1607.06667"/></url>
<url><loc>https://scifaro.com/en/abs/experiments-on-the-dcase-challenge-2016-acoustic-scene-classification-and-sound-event-detection-in-real-life-recording-1607.06706</loc><lastmod>2016-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experiments-on-the-dcase-challenge-2016-acoustic-scene-classification-and-sound-event-detection-in-real-life-recording-1607.06706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experiments-on-the-dcase-challenge-2016-acoustic-scene-classification-and-sound-event-detection-in-real-life-recording-1607.06706"/></url>
<url><loc>https://scifaro.com/en/abs/abroa-audio-based-room-occupancy-analysis-using-gaussian-mixtures-and-hidden-markov-models-1607.07801</loc><lastmod>2016-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/abroa-audio-based-room-occupancy-analysis-using-gaussian-mixtures-and-hidden-markov-models-1607.07801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/abroa-audio-based-room-occupancy-analysis-using-gaussian-mixtures-and-hidden-markov-models-1607.07801"/></url>
<url><loc>https://scifaro.com/en/abs/early-and-late-time-acoustic-measures-for-underwater-seismic-airgun-signals-in-long-term-acoustic-data-sets-1607.08482</loc><lastmod>2016-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/early-and-late-time-acoustic-measures-for-underwater-seismic-airgun-signals-in-long-term-acoustic-data-sets-1607.08482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/early-and-late-time-acoustic-measures-for-underwater-seismic-airgun-signals-in-long-term-acoustic-data-sets-1607.08482"/></url>
<url><loc>https://scifaro.com/en/abs/l-evy-nmf-for-robust-nonnegative-source-separation-1608.01844</loc><lastmod>2016-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l-evy-nmf-for-robust-nonnegative-source-separation-1608.01844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l-evy-nmf-for-robust-nonnegative-source-separation-1608.01844"/></url>
<url><loc>https://scifaro.com/en/abs/model-based-stft-phase-recovery-for-audio-source-separation-1608.01953</loc><lastmod>2018-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-based-stft-phase-recovery-for-audio-source-separation-1608.01953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-based-stft-phase-recovery-for-audio-source-separation-1608.01953"/></url>
<url><loc>https://scifaro.com/en/abs/incorporation-of-speech-duration-information-in-score-fusion-of-speaker-recognition-systems-1608.02272</loc><lastmod>2016-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporation-of-speech-duration-information-in-score-fusion-of-speaker-recognition-systems-1608.02272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporation-of-speech-duration-information-in-score-fusion-of-speaker-recognition-systems-1608.02272"/></url>
<url><loc>https://scifaro.com/en/abs/bird-detection-in-audio-a-survey-and-a-challenge-1608.03417</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bird-detection-in-audio-a-survey-and-a-challenge-1608.03417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bird-detection-in-audio-a-survey-and-a-challenge-1608.03417"/></url>
<url><loc>https://scifaro.com/en/abs/speech-signal-analysis-for-the-estimation-of-heart-rates-under-different-emotional-states-1608.03720</loc><lastmod>2016-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-signal-analysis-for-the-estimation-of-heart-rates-under-different-emotional-states-1608.03720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-signal-analysis-for-the-estimation-of-heart-rates-under-different-emotional-states-1608.03720"/></url>
<url><loc>https://scifaro.com/en/abs/design-of-variable-bandpass-filters-using-first-order-allpass-transformation-and-coefficient-decimation-1608.04069</loc><lastmod>2016-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-of-variable-bandpass-filters-using-first-order-allpass-transformation-and-coefficient-decimation-1608.04069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-of-variable-bandpass-filters-using-first-order-allpass-transformation-and-coefficient-decimation-1608.04069"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-environmental-sound-classification-1608.04363</loc><lastmod>2017-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-environmental-sound-classification-1608.04363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-networks-and-data-augmentation-for-environmental-sound-classification-1608.04363"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-efficiency-of-damas-for-sound-source-localization-via-wavelet-compression-computational-grid-1608.05179</loc><lastmod>2017-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-efficiency-of-damas-for-sound-source-localization-via-wavelet-compression-computational-grid-1608.05179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-efficiency-of-damas-for-sound-source-localization-via-wavelet-compression-computational-grid-1608.05179"/></url>
<url><loc>https://scifaro.com/en/abs/diffuse-field-coherence-of-sensors-with-arbitrary-directional-responses-1608.07713</loc><lastmod>2016-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffuse-field-coherence-of-sensors-with-arbitrary-directional-responses-1608.07713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffuse-field-coherence-of-sensors-with-arbitrary-directional-responses-1608.07713"/></url>
<url><loc>https://scifaro.com/en/abs/a-non-iterative-method-for-re-construction-of-phase-from-stft-magnitude-1609.00291</loc><lastmod>2019-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-non-iterative-method-for-re-construction-of-phase-from-stft-magnitude-1609.00291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-non-iterative-method-for-re-construction-of-phase-from-stft-magnitude-1609.00291"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-enhancement-for-single-channel-audio-source-separation-using-deep-neural-networks-1609.01678</loc><lastmod>2016-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-enhancement-for-single-channel-audio-source-separation-using-deep-neural-networks-1609.01678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-enhancement-for-single-channel-audio-source-separation-using-deep-neural-networks-1609.01678"/></url>
<url><loc>https://scifaro.com/en/abs/relaxed-binaural-lcmv-beamforming-1609.03213</loc><lastmod>2019-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relaxed-binaural-lcmv-beamforming-1609.03213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relaxed-binaural-lcmv-beamforming-1609.03213"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-network-alternative-to-non-negative-audio-models-1609.03296</loc><lastmod>2016-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-network-alternative-to-non-negative-audio-models-1609.03296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-network-alternative-to-non-negative-audio-models-1609.03296"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-intensity-energy-density-and-diffuseness-estimation-in-a-directionally-constrained-region-1609.03409</loc><lastmod>2016-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-intensity-energy-density-and-diffuseness-estimation-in-a-directionally-constrained-region-1609.03409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-intensity-energy-density-and-diffuseness-estimation-in-a-directionally-constrained-region-1609.03409"/></url>
<url><loc>https://scifaro.com/en/abs/wavenet-a-generative-model-for-raw-audio-1609.03499</loc><lastmod>2016-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavenet-a-generative-model-for-raw-audio-1609.03499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavenet-a-generative-model-for-raw-audio-1609.03499"/></url>
<url><loc>https://scifaro.com/en/abs/tristounet-triplet-loss-for-speaker-turn-embedding-1609.04301</loc><lastmod>2017-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tristounet-triplet-loss-for-speaker-turn-embedding-1609.04301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tristounet-triplet-loss-for-speaker-turn-embedding-1609.04301"/></url>
<url><loc>https://scifaro.com/en/abs/intrinsic-normalization-and-extrinsic-denormalization-of-formant-data-of-vowels-1609.05104</loc><lastmod>2016-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intrinsic-normalization-and-extrinsic-denormalization-of-formant-data-of-vowels-1609.05104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intrinsic-normalization-and-extrinsic-denormalization-of-formant-data-of-vowels-1609.05104"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-for-self-training-audio-event-detectors-using-web-data-1609.06026</loc><lastmod>2017-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-for-self-training-audio-event-detectors-using-web-data-1609.06026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-for-self-training-audio-event-detectors-using-web-data-1609.06026"/></url>
<url><loc>https://scifaro.com/en/abs/interference-reduction-in-music-recordings-combining-kernel-additive-modelling-and-non-negative-matrix-factorization-1609.06210</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interference-reduction-in-music-recordings-combining-kernel-additive-modelling-and-non-negative-matrix-factorization-1609.06210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interference-reduction-in-music-recordings-combining-kernel-additive-modelling-and-non-negative-matrix-factorization-1609.06210"/></url>
<url><loc>https://scifaro.com/en/abs/ku-ispl-language-recognition-system-for-nist-2015-i-vector-machine-learning-challenge-1609.06404</loc><lastmod>2016-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ku-ispl-language-recognition-system-for-nist-2015-i-vector-machine-learning-challenge-1609.06404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ku-ispl-language-recognition-system-for-nist-2015-i-vector-machine-learning-challenge-1609.06404"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-statistic-feature-of-the-short-time-amplitude-spectrum-values-for-human-s-unvoiced-pronunciation-1609.07245</loc><lastmod>2016-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-statistic-feature-of-the-short-time-amplitude-spectrum-values-for-human-s-unvoiced-pronunciation-1609.07245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-statistic-feature-of-the-short-time-amplitude-spectrum-values-for-human-s-unvoiced-pronunciation-1609.07245"/></url>
<url><loc>https://scifaro.com/en/abs/discovering-sound-concepts-and-acoustic-relations-in-text-1609.07384</loc><lastmod>2017-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discovering-sound-concepts-and-acoustic-relations-in-text-1609.07384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discovering-sound-concepts-and-acoustic-relations-in-text-1609.07384"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-for-children-s-speech-1609.07498</loc><lastmod>2016-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-for-children-s-speech-1609.07498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-for-children-s-speech-1609.07498"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-diarization-system-for-measuring-dominance-in-peer-led-team-learning-groups-1609.08211</loc><lastmod>2016-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-diarization-system-for-measuring-dominance-in-peer-led-team-learning-groups-1609.08211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-diarization-system-for-measuring-dominance-in-peer-led-team-learning-groups-1609.08211"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-detection-of-bird-vocalisations-1609.08408</loc><lastmod>2016-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-detection-of-bird-vocalisations-1609.08408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-detection-of-bird-vocalisations-1609.08408"/></url>
<url><loc>https://scifaro.com/en/abs/decision-making-based-on-cohort-scores-for-speaker-verification-1609.08419</loc><lastmod>2016-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decision-making-based-on-cohort-scores-for-speaker-verification-1609.08419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decision-making-based-on-cohort-scores-for-speaker-verification-1609.08419"/></url>
<url><loc>https://scifaro.com/en/abs/local-training-for-plda-in-speaker-verification-1609.08433</loc><lastmod>2016-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-training-for-plda-in-speaker-verification-1609.08433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-training-for-plda-in-speaker-verification-1609.08433"/></url>
<url><loc>https://scifaro.com/en/abs/collaborative-learning-for-language-and-speaker-recognition-1609.08442</loc><lastmod>2017-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collaborative-learning-for-language-and-speaker-recognition-1609.08442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collaborative-learning-for-language-and-speaker-recognition-1609.08442"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-and-sparsity-analysis-applied-to-speech-enhancement-via-online-estimated-dictionary-1609.09231</loc><lastmod>2016-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-and-sparsity-analysis-applied-to-speech-enhancement-via-online-estimated-dictionary-1609.09231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-and-sparsity-analysis-applied-to-speech-enhancement-via-online-estimated-dictionary-1609.09231"/></url>
<url><loc>https://scifaro.com/en/abs/measurement-of-sound-fields-using-moving-microphones-1609.09390</loc><lastmod>2016-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measurement-of-sound-fields-using-moving-microphones-1609.09390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measurement-of-sound-fields-using-moving-microphones-1609.09390"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-architectures-for-large-scale-audio-classification-1609.09430</loc><lastmod>2017-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-architectures-for-large-scale-audio-classification-1609.09430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-architectures-for-large-scale-audio-classification-1609.09430"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-speech-enhancement-in-envelop-and-details-subspaces-1609.09443</loc><lastmod>2017-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-speech-enhancement-in-envelop-and-details-subspaces-1609.09443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-speech-enhancement-in-envelop-and-details-subspaces-1609.09443"/></url>
<url><loc>https://scifaro.com/en/abs/rectified-binaural-ratio-a-complex-t-distributed-feature-for-robust-sound-localization-1609.09743</loc><lastmod>2016-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rectified-binaural-ratio-a-complex-t-distributed-feature-for-robust-sound-localization-1609.09743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rectified-binaural-ratio-a-complex-t-distributed-feature-for-robust-sound-localization-1609.09743"/></url>
<url><loc>https://scifaro.com/en/abs/phase-unmixing-multichannel-source-separation-with-magnitude-constraints-1609.09744</loc><lastmod>2017-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-unmixing-multichannel-source-separation-with-magnitude-constraints-1609.09744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-unmixing-multichannel-source-separation-with-magnitude-constraints-1609.09744"/></url>
<url><loc>https://scifaro.com/en/abs/hearing-in-a-shoe-box-binaural-source-position-and-wall-absorption-estimation-using-virtually-supervised-learning-1609.09747</loc><lastmod>2017-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearing-in-a-shoe-box-binaural-source-position-and-wall-absorption-estimation-using-virtually-supervised-learning-1609.09747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearing-in-a-shoe-box-binaural-source-position-and-wall-absorption-estimation-using-virtually-supervised-learning-1609.09747"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-dictionary-based-approach-for-background-noise-and-speaker-classification-and-subsequent-source-separation-1609.09764</loc><lastmod>2016-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-dictionary-based-approach-for-background-noise-and-speaker-classification-and-subsequent-source-separation-1609.09764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-dictionary-based-approach-for-background-noise-and-speaker-classification-and-subsequent-source-separation-1609.09764"/></url>
<url><loc>https://scifaro.com/en/abs/very-deep-convolutional-neural-networks-for-raw-waveforms-1610.00087</loc><lastmod>2016-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/very-deep-convolutional-neural-networks-for-raw-waveforms-1610.00087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/very-deep-convolutional-neural-networks-for-raw-waveforms-1610.00087"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-modeling-of-musical-solos-as-complex-networks-1610.00468</loc><lastmod>2016-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-modeling-of-musical-solos-as-complex-networks-1610.00468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-modeling-of-musical-solos-as-complex-networks-1610.00468"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-via-two-stage-dual-tree-complex-wavelet-packet-transform-with-a-speech-presence-probability-estimator-1610.00644</loc><lastmod>2017-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-via-two-stage-dual-tree-complex-wavelet-packet-transform-with-a-speech-presence-probability-estimator-1610.00644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-via-two-stage-dual-tree-complex-wavelet-packet-transform-with-a-speech-presence-probability-estimator-1610.00644"/></url>
<url><loc>https://scifaro.com/en/abs/divide-and-conquer-based-ensemble-to-spot-emotions-in-speech-using-mfcc-and-random-forest-1610.01382</loc><lastmod>2016-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/divide-and-conquer-based-ensemble-to-spot-emotions-in-speech-using-mfcc-and-random-forest-1610.01382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/divide-and-conquer-based-ensemble-to-spot-emotions-in-speech-using-mfcc-and-random-forest-1610.01382"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-detection-classification-model-for-audio-tagging-of-weakly-labelled-data-1610.01797</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-detection-classification-model-for-audio-tagging-of-weakly-labelled-data-1610.01797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-detection-classification-model-for-audio-tagging-of-weakly-labelled-data-1610.01797"/></url>
<url><loc>https://scifaro.com/en/abs/an-automatic-system-for-acoustic-microphone-geometry-calibration-based-on-minimal-solvers-1610.02392</loc><lastmod>2016-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automatic-system-for-acoustic-microphone-geometry-calibration-based-on-minimal-solvers-1610.02392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automatic-system-for-acoustic-microphone-geometry-calibration-based-on-minimal-solvers-1610.02392"/></url>
<url><loc>https://scifaro.com/en/abs/a-music-generating-system-inspired-by-the-science-of-complex-adaptive-systems-1610.02475</loc><lastmod>2019-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-music-generating-system-inspired-by-the-science-of-complex-adaptive-systems-1610.02475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-music-generating-system-inspired-by-the-science-of-complex-adaptive-systems-1610.02475"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-based-speaker-recognition-on-short-utterances-1610.02831</loc><lastmod>2016-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-based-speaker-recognition-on-short-utterances-1610.02831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-based-speaker-recognition-on-short-utterances-1610.02831"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-synthetic-speech-detection-using-frame-and-segment-specific-importance-weighting-1610.03009</loc><lastmod>2016-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-synthetic-speech-detection-using-frame-and-segment-specific-importance-weighting-1610.03009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-synthetic-speech-detection-using-frame-and-segment-specific-importance-weighting-1610.03009"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-speaker-recognition-on-short-utterances-1610.03190</loc><lastmod>2016-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-speaker-recognition-on-short-utterances-1610.03190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-speaker-recognition-on-short-utterances-1610.03190"/></url>
<url><loc>https://scifaro.com/en/abs/raven-x-high-performance-data-mining-toolbox-for-bioacoustic-data-analysis-1610.03772</loc><lastmod>2016-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raven-x-high-performance-data-mining-toolbox-for-bioacoustic-data-analysis-1610.03772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raven-x-high-performance-data-mining-toolbox-for-bioacoustic-data-analysis-1610.03772"/></url>
<url><loc>https://scifaro.com/en/abs/tonal-consonance-parameters-link-microscopic-and-macroscopic-properties-of-music-exposing-a-hidden-order-in-melody-1610.04551</loc><lastmod>2017-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tonal-consonance-parameters-link-microscopic-and-macroscopic-properties-of-music-exposing-a-hidden-order-in-melody-1610.04551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tonal-consonance-parameters-link-microscopic-and-macroscopic-properties-of-music-exposing-a-hidden-order-in-melody-1610.04551"/></url>
<url><loc>https://scifaro.com/en/abs/non-negative-matrix-factorization-based-subband-decomposition-for-acoustic-source-localization-1610.04695</loc><lastmod>2016-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-based-subband-decomposition-for-acoustic-source-localization-1610.04695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-based-subband-decomposition-for-acoustic-source-localization-1610.04695"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-source-localization-on-multiple-manifolds-with-distributed-microphones-1610.04770</loc><lastmod>2016-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-source-localization-on-multiple-manifolds-with-distributed-microphones-1610.04770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-source-localization-on-multiple-manifolds-with-distributed-microphones-1610.04770"/></url>
<url><loc>https://scifaro.com/en/abs/making-mainstream-synthesizers-with-csound-1610.04922</loc><lastmod>2016-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/making-mainstream-synthesizers-with-csound-1610.04922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/making-mainstream-synthesizers-with-csound-1610.04922"/></url>
<url><loc>https://scifaro.com/en/abs/improving-short-utterance-plda-speaker-verification-using-suv-modelling-and-utterance-partitioning-approach-1610.04965</loc><lastmod>2016-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-short-utterance-plda-speaker-verification-using-suv-modelling-and-utterance-partitioning-approach-1610.04965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-short-utterance-plda-speaker-verification-using-suv-modelling-and-utterance-partitioning-approach-1610.04965"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-reflector-localization-novel-image-source-reversion-and-direct-localization-methods-1610.05653</loc><lastmod>2017-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-reflector-localization-novel-image-source-reversion-and-direct-localization-methods-1610.05653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-reflector-localization-novel-image-source-reversion-and-direct-localization-methods-1610.05653"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-task-learning-model-for-malware-classification-with-useful-file-access-pattern-from-api-call-sequence-1610.05945</loc><lastmod>2016-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-task-learning-model-for-malware-classification-with-useful-file-access-pattern-from-api-call-sequence-1610.05945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-task-learning-model-for-malware-classification-with-useful-file-access-pattern-from-api-call-sequence-1610.05945"/></url>
<url><loc>https://scifaro.com/en/abs/a-bayesian-approach-to-estimation-of-speaker-normalization-parameters-1610.05948</loc><lastmod>2016-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bayesian-approach-to-estimation-of-speaker-normalization-parameters-1610.05948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bayesian-approach-to-estimation-of-speaker-normalization-parameters-1610.05948"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-of-infant-speech-perception-and-learning-1610.06214</loc><lastmod>2016-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-of-infant-speech-perception-and-learning-1610.06214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-of-infant-speech-perception-and-learning-1610.06214"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-factored-three-way-restricted-boltzmann-machines-for-speech-detection-1611.00326</loc><lastmod>2017-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-factored-three-way-restricted-boltzmann-machines-for-speech-detection-1611.00326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-factored-three-way-restricted-boltzmann-machines-for-speech-detection-1611.00326"/></url>
<url><loc>https://scifaro.com/en/abs/the-intelligent-voice-2016-speaker-recognition-system-1611.00514</loc><lastmod>2016-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-intelligent-voice-2016-speaker-recognition-system-1611.00514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-intelligent-voice-2016-speaker-recognition-system-1611.00514"/></url>
<url><loc>https://scifaro.com/en/abs/frame-theory-for-signal-processing-in-psychoacoustics-1611.00966</loc><lastmod>2020-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-theory-for-signal-processing-in-psychoacoustics-1611.00966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-theory-for-signal-processing-in-psychoacoustics-1611.00966"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-speaker-localization-based-on-direct-path-features-and-likelihood-maximization-with-spatial-sparsity-regularization-1611.01172</loc><lastmod>2017-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-speaker-localization-based-on-direct-path-features-and-likelihood-maximization-with-spatial-sparsity-regularization-1611.01172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-speaker-localization-based-on-direct-path-features-and-likelihood-maximization-with-spatial-sparsity-regularization-1611.01172"/></url>
<url><loc>https://scifaro.com/en/abs/vr-space-opera-mimetic-spectralism-in-an-immersive-starlight-audification-system-1611.03081</loc><lastmod>2016-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vr-space-opera-mimetic-spectralism-in-an-immersive-starlight-audification-system-1611.03081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vr-space-opera-mimetic-spectralism-in-an-immersive-starlight-audification-system-1611.03081"/></url>
<url><loc>https://scifaro.com/en/abs/noise-reduction-combining-microphone-and-piezoelectric-device-1611.03178</loc><lastmod>2016-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-reduction-combining-microphone-and-piezoelectric-device-1611.03178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-reduction-combining-microphone-and-piezoelectric-device-1611.03178"/></url>
<url><loc>https://scifaro.com/en/abs/song-from-pi-a-musically-plausible-network-for-pop-music-generation-1611.03477</loc><lastmod>2016-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/song-from-pi-a-musically-plausible-network-for-pop-music-generation-1611.03477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/song-from-pi-a-musically-plausible-network-for-pop-music-generation-1611.03477"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-north-atlantic-right-whale-upcalls-using-local-binary-patterns-in-a-two-stage-strategy-1611.04947</loc><lastmod>2016-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-north-atlantic-right-whale-upcalls-using-local-binary-patterns-in-a-two-stage-strategy-1611.04947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-north-atlantic-right-whale-upcalls-using-local-binary-patterns-in-a-two-stage-strategy-1611.04947"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-tala-computationally-in-polyphonic-context-a-novel-approach-1611.05182</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-tala-computationally-in-polyphonic-context-a-novel-approach-1611.05182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-tala-computationally-in-polyphonic-context-a-novel-approach-1611.05182"/></url>
<url><loc>https://scifaro.com/en/abs/decision-based-transcription-of-jazz-guitar-solos-using-a-harmonic-bident-analysis-filter-bank-and-spectral-distribution-weighting-1611.06505</loc><lastmod>2016-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decision-based-transcription-of-jazz-guitar-solos-using-a-harmonic-bident-analysis-filter-bank-and-spectral-distribution-weighting-1611.06505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decision-based-transcription-of-jazz-guitar-solos-using-a-harmonic-bident-analysis-filter-bank-and-spectral-distribution-weighting-1611.06505"/></url>
<url><loc>https://scifaro.com/en/abs/momos-mt-mobile-monophonic-system-for-music-transcription-1611.07351</loc><lastmod>2016-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/momos-mt-mobile-monophonic-system-for-music-transcription-1611.07351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/momos-mt-mobile-monophonic-system-for-music-transcription-1611.07351"/></url>
<url><loc>https://scifaro.com/en/abs/fast-chirplet-transform-to-enhance-cnn-machine-listening-validation-on-animal-calls-and-speech-1611.08749</loc><lastmod>2017-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-chirplet-transform-to-enhance-cnn-machine-listening-validation-on-animal-calls-and-speech-1611.08749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-chirplet-transform-to-enhance-cnn-machine-listening-validation-on-animal-calls-and-speech-1611.08749"/></url>
<url><loc>https://scifaro.com/en/abs/siso-and-simo-accompaniment-cancellation-for-live-solo-recordings-based-on-short-time-erb-band-wiener-filtering-and-spectral-subtraction-1611.08905</loc><lastmod>2016-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siso-and-simo-accompaniment-cancellation-for-live-solo-recordings-based-on-short-time-erb-band-wiener-filtering-and-spectral-subtraction-1611.08905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siso-and-simo-accompaniment-cancellation-for-live-solo-recordings-based-on-short-time-erb-band-wiener-filtering-and-spectral-subtraction-1611.08905"/></url>
<url><loc>https://scifaro.com/en/abs/deep-attractor-network-for-single-microphone-speaker-separation-1611.08930</loc><lastmod>2017-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-attractor-network-for-single-microphone-speaker-separation-1611.08930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-attractor-network-for-single-microphone-speaker-separation-1611.08930"/></url>
<url><loc>https://scifaro.com/en/abs/fast-wavenet-generation-algorithm-1611.09482</loc><lastmod>2016-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-wavenet-generation-algorithm-1611.09482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-wavenet-generation-algorithm-1611.09482"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-audio-pattern-using-convolutional-neural-network-from-raw-waveforms-1611.09524</loc><lastmod>2016-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-audio-pattern-using-convolutional-neural-network-from-raw-waveforms-1611.09524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-audio-pattern-using-convolutional-neural-network-from-raw-waveforms-1611.09524"/></url>
<url><loc>https://scifaro.com/en/abs/learning-filter-banks-using-deep-learning-for-acoustic-signals-1611.09526</loc><lastmod>2016-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-filter-banks-using-deep-learning-for-acoustic-signals-1611.09526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-filter-banks-using-deep-learning-for-acoustic-signals-1611.09526"/></url>
<url><loc>https://scifaro.com/en/abs/getting-closer-to-the-essence-of-music-the-con-espressione-manifesto-1611.09733</loc><lastmod>2016-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/getting-closer-to-the-essence-of-music-the-con-espressione-manifesto-1611.09733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/getting-closer-to-the-essence-of-music-the-con-espressione-manifesto-1611.09733"/></url>
<url><loc>https://scifaro.com/en/abs/a-non-linear-multifractal-study-to-illustrate-the-evolution-of-tagore-songs-over-a-century-1612.00171</loc><lastmod>2016-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-non-linear-multifractal-study-to-illustrate-the-evolution-of-tagore-songs-over-a-century-1612.00171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-non-linear-multifractal-study-to-illustrate-the-evolution-of-tagore-songs-over-a-century-1612.00171"/></url>
<url><loc>https://scifaro.com/en/abs/a-non-linear-approach-towards-automated-emotion-analysis-in-hindustani-music-1612.00172</loc><lastmod>2016-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-non-linear-approach-towards-automated-emotion-analysis-in-hindustani-music-1612.00172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-non-linear-approach-towards-automated-emotion-analysis-in-hindustani-music-1612.00172"/></url>
<url><loc>https://scifaro.com/en/abs/frida-fri-based-doa-estimation-for-arbitrary-array-layouts-1612.00876</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frida-fri-based-doa-estimation-for-arbitrary-array-layouts-1612.00876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frida-fri-based-doa-estimation-for-arbitrary-array-layouts-1612.00876"/></url>
<url><loc>https://scifaro.com/en/abs/fma-a-dataset-for-music-analysis-1612.01840</loc><lastmod>2017-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fma-a-dataset-for-music-analysis-1612.01840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fma-a-dataset-for-music-analysis-1612.01840"/></url>
<url><loc>https://scifaro.com/en/abs/an-algorithm-to-assign-musical-prime-commas-to-every-prime-number-and-construct-a-universal-and-compact-free-just-intonation-musical-notation-1612.01860</loc><lastmod>2017-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-algorithm-to-assign-musical-prime-commas-to-every-prime-number-and-construct-a-universal-and-compact-free-just-intonation-musical-notation-1612.01860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-algorithm-to-assign-musical-prime-commas-to-every-prime-number-and-construct-a-universal-and-compact-free-just-intonation-musical-notation-1612.01860"/></url>
<url><loc>https://scifaro.com/en/abs/segmental-convolutional-neural-networks-for-detection-of-cardiac-abnormality-with-noisy-heart-sound-recordings-1612.01943</loc><lastmod>2016-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segmental-convolutional-neural-networks-for-detection-of-cardiac-abnormality-with-noisy-heart-sound-recordings-1612.01943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segmental-convolutional-neural-networks-for-detection-of-cardiac-abnormality-with-noisy-heart-sound-recordings-1612.01943"/></url>
<url><loc>https://scifaro.com/en/abs/towards-computer-assisted-understanding-of-dynamics-in-symphonic-music-1612.02198</loc><lastmod>2016-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-computer-assisted-understanding-of-dynamics-in-symphonic-music-1612.02198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-computer-assisted-understanding-of-dynamics-in-symphonic-music-1612.02198"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-networks-for-passive-monitoring-of-a-shallow-water-environment-using-a-single-sensor-1612.03505</loc><lastmod>2017-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-networks-for-passive-monitoring-of-a-shallow-water-environment-using-a-single-sensor-1612.03505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-networks-for-passive-monitoring-of-a-shallow-water-environment-using-a-single-sensor-1612.03505"/></url>
<url><loc>https://scifaro.com/en/abs/a-unit-selection-methodology-for-music-generation-using-deep-neural-networks-1612.03789</loc><lastmod>2016-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unit-selection-methodology-for-music-generation-using-deep-neural-networks-1612.03789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unit-selection-methodology-for-music-generation-using-deep-neural-networks-1612.03789"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-dctnet-for-audio-signal-classification-1612.04028</loc><lastmod>2017-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-dctnet-for-audio-signal-classification-1612.04028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-dctnet-for-audio-signal-classification-1612.04028"/></url>
<url><loc>https://scifaro.com/en/abs/joint-bayesian-gaussian-discriminant-analysis-for-speaker-verification-1612.04056</loc><lastmod>2017-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-bayesian-gaussian-discriminant-analysis-for-speaker-verification-1612.04056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-bayesian-gaussian-discriminant-analysis-for-speaker-verification-1612.04056"/></url>
<url><loc>https://scifaro.com/en/abs/imposing-higher-level-structure-in-polyphonic-music-generation-using-convolutional-restricted-boltzmann-machines-and-constraints-1612.04742</loc><lastmod>2018-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imposing-higher-level-structure-in-polyphonic-music-generation-using-convolutional-restricted-boltzmann-machines-and-constraints-1612.04742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imposing-higher-level-structure-in-polyphonic-music-generation-using-convolutional-restricted-boltzmann-machines-and-constraints-1612.04742"/></url>
<url><loc>https://scifaro.com/en/abs/combination-of-linear-prediction-and-phase-decomposition-for-glottal-source-analysis-on-voiced-speech-1612.04919</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combination-of-linear-prediction-and-phase-decomposition-for-glottal-source-analysis-on-voiced-speech-1612.04919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combination-of-linear-prediction-and-phase-decomposition-for-glottal-source-analysis-on-voiced-speech-1612.04919"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-with-deep-learning-1612.04928</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-with-deep-learning-1612.04928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-with-deep-learning-1612.04928"/></url>
<url><loc>https://scifaro.com/en/abs/feature-learning-for-chord-recognition-the-deep-chroma-extractor-1612.05065</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-learning-for-chord-recognition-the-deep-chroma-extractor-1612.05065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-learning-for-chord-recognition-the-deep-chroma-extractor-1612.05065"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-audio-sheet-music-retrieval-1612.05070</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-audio-sheet-music-retrieval-1612.05070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-audio-sheet-music-retrieval-1612.05070"/></url>
<url><loc>https://scifaro.com/en/abs/live-score-following-on-sheet-music-images-1612.05076</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/live-score-following-on-sheet-music-images-1612.05076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/live-score-following-on-sheet-music-images-1612.05076"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-potential-of-simple-framewise-approaches-to-piano-transcription-1612.05153</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-potential-of-simple-framewise-approaches-to-piano-transcription-1612.05153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-potential-of-simple-framewise-approaches-to-piano-transcription-1612.05153"/></url>
<url><loc>https://scifaro.com/en/abs/a-phase-vocoder-based-on-nonstationary-gabor-frames-1612.05156</loc><lastmod>2017-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-phase-vocoder-based-on-nonstationary-gabor-frames-1612.05156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-phase-vocoder-based-on-nonstationary-gabor-frames-1612.05156"/></url>
<url><loc>https://scifaro.com/en/abs/lia-system-description-for-nist-sre-2016-1612.05168</loc><lastmod>2016-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lia-system-description-for-nist-sre-2016-1612.05168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lia-system-description-for-nist-sre-2016-1612.05168"/></url>
<url><loc>https://scifaro.com/en/abs/neural-networks-based-eeg-speech-models-1612.05369</loc><lastmod>2017-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-networks-based-eeg-speech-models-1612.05369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-networks-based-eeg-speech-models-1612.05369"/></url>
<url><loc>https://scifaro.com/en/abs/basis-function-modeling-of-loudness-variations-in-ensemble-performance-1612.05432</loc><lastmod>2016-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/basis-function-modeling-of-loudness-variations-in-ensemble-performance-1612.05432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/basis-function-modeling-of-loudness-variations-in-ensemble-performance-1612.05432"/></url>
<url><loc>https://scifaro.com/en/abs/on-bird-sound-recordings-automatic-acoustic-recognition-of-activities-and-contexts-1612.05489</loc><lastmod>2016-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-bird-sound-recordings-automatic-acoustic-recognition-of-activities-and-contexts-1612.05489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-bird-sound-recordings-automatic-acoustic-recognition-of-activities-and-contexts-1612.05489"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-based-two-dimensional-robust-least-squares-frequency-invariant-beamformer-design-for-robot-audition-1612.06151</loc><lastmod>2017-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-based-two-dimensional-robust-least-squares-frequency-invariant-beamformer-design-for-robot-audition-1612.06151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-based-two-dimensional-robust-least-squares-frequency-invariant-beamformer-design-for-robot-audition-1612.06151"/></url>
<url><loc>https://scifaro.com/en/abs/vast-the-virtual-acoustic-space-traveler-dataset-1612.06287</loc><lastmod>2016-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vast-the-virtual-acoustic-space-traveler-dataset-1612.06287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vast-the-virtual-acoustic-space-traveler-dataset-1612.06287"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-target-activity-detection-based-on-recurrent-neural-networks-1612.06642</loc><lastmod>2016-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-target-activity-detection-based-on-recurrent-neural-networks-1612.06642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-target-activity-detection-based-on-recurrent-neural-networks-1612.06642"/></url>
<url><loc>https://scifaro.com/en/abs/robustness-of-voice-conversion-techniques-under-mismatched-conditions-1612.07523</loc><lastmod>2016-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustness-of-voice-conversion-techniques-under-mismatched-conditions-1612.07523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustness-of-voice-conversion-techniques-under-mismatched-conditions-1612.07523"/></url>
<url><loc>https://scifaro.com/en/abs/samplernn-an-unconditional-end-to-end-neural-audio-generation-model-1612.07837</loc><lastmod>2017-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samplernn-an-unconditional-end-to-end-neural-audio-generation-model-1612.07837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samplernn-an-unconditional-end-to-end-neural-audio-generation-model-1612.07837"/></url>
<url><loc>https://scifaro.com/en/abs/what-makes-audio-event-detection-harder-than-classification-1612.09089</loc><lastmod>2018-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-makes-audio-event-detection-harder-than-classification-1612.09089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-makes-audio-event-detection-harder-than-classification-1612.09089"/></url>
<url><loc>https://scifaro.com/en/abs/phase-incorporating-speech-enhancement-based-on-complex-valued-gaussian-process-latent-variable-model-1612.09150</loc><lastmod>2017-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-incorporating-speech-enhancement-based-on-complex-valued-gaussian-process-latent-variable-model-1612.09150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-incorporating-speech-enhancement-based-on-complex-valued-gaussian-process-latent-variable-model-1612.09150"/></url>
<url><loc>https://scifaro.com/en/abs/on-higher-order-positive-differential-energy-operator-1701.03834</loc><lastmod>2017-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-higher-order-positive-differential-energy-operator-1701.03834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-higher-order-positive-differential-energy-operator-1701.03834"/></url>
<url><loc>https://scifaro.com/en/abs/empirical-study-of-drone-sound-detection-in-real-life-environment-with-deep-neural-networks-1701.05779</loc><lastmod>2017-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empirical-study-of-drone-sound-detection-in-real-life-environment-with-deep-neural-networks-1701.05779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empirical-study-of-drone-sound-detection-in-real-life-environment-with-deep-neural-networks-1701.05779"/></url>
<url><loc>https://scifaro.com/en/abs/lyrics-to-audio-alignment-by-unsupervised-discovery-of-repetitive-patterns-in-vowel-acoustics-1701.06078</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lyrics-to-audio-alignment-by-unsupervised-discovery-of-repetitive-patterns-in-vowel-acoustics-1701.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lyrics-to-audio-alignment-by-unsupervised-discovery-of-repetitive-patterns-in-vowel-acoustics-1701.06078"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-on-bengali-phoneme-recognition-1701.08156</loc><lastmod>2018-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-bengali-phoneme-recognition-1701.08156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-bengali-phoneme-recognition-1701.08156"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-analysis-of-the-entanglement-problem-in-neural-network-based-music-transcription-systems-1702.00025</loc><lastmod>2017-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-analysis-of-the-entanglement-problem-in-neural-network-based-music-transcription-systems-1702.00025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-analysis-of-the-entanglement-problem-in-neural-network-based-music-transcription-systems-1702.00025"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-futility-of-learning-complex-frame-level-language-models-for-chord-recognition-1702.00178</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-futility-of-learning-complex-frame-level-language-models-for-chord-recognition-1702.00178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-futility-of-learning-complex-frame-level-language-models-for-chord-recognition-1702.00178"/></url>
<url><loc>https://scifaro.com/en/abs/ku-ispl-speaker-recognition-systems-under-language-mismatch-condition-for-nist-2016-speaker-recognition-evaluation-1702.00956</loc><lastmod>2017-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ku-ispl-speaker-recognition-systems-under-language-mismatch-condition-for-nist-2016-speaker-recognition-evaluation-1702.00956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ku-ispl-speaker-recognition-systems-under-language-mismatch-condition-for-nist-2016-speaker-recognition-evaluation-1702.00956"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-voice-utterance-with-aging-factor-using-the-method-of-mfcc-multichannel-1702.01999</loc><lastmod>2017-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-voice-utterance-with-aging-factor-using-the-method-of-mfcc-multichannel-1702.01999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-voice-utterance-with-aging-factor-using-the-method-of-mfcc-multichannel-1702.01999"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-importance-of-temporal-context-in-proximity-kernels-a-vocal-separation-case-study-1702.02130</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-importance-of-temporal-context-in-proximity-kernels-a-vocal-separation-case-study-1702.02130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-importance-of-temporal-context-in-proximity-kernels-a-vocal-separation-case-study-1702.02130"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-change-detection-using-features-through-a-neural-network-speaker-classifier-1702.02285</loc><lastmod>2017-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-change-detection-using-features-through-a-neural-network-speaker-classifier-1702.02285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-change-detection-using-features-through-a-neural-network-speaker-classifier-1702.02285"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-speaker-classification-and-verification-systems-with-enhanced-features-1702.02289</loc><lastmod>2017-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-speaker-classification-and-verification-systems-with-enhanced-features-1702.02289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-speaker-classification-and-verification-systems-with-enhanced-features-1702.02289"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-filter-bank-cepstral-coefficients-for-spoofing-detection-1702.03791</loc><lastmod>2017-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-filter-bank-cepstral-coefficients-for-spoofing-detection-1702.03791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-filter-bank-cepstral-coefficients-for-spoofing-detection-1702.03791"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-linear-prediction-for-blind-reverberant-audio-source-separation-1702.07713</loc><lastmod>2017-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-linear-prediction-for-blind-reverberant-audio-source-separation-1702.07713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-linear-prediction-for-blind-reverberant-audio-source-separation-1702.07713"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-gated-recurrent-neural-network-incorporating-spatial-features-for-audio-tagging-1702.07787</loc><lastmod>2017-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-gated-recurrent-neural-network-incorporating-spatial-features-for-audio-tagging-1702.07787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-gated-recurrent-neural-network-incorporating-spatial-features-for-audio-tagging-1702.07787"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-model-and-its-inverse-of-an-audio-system-1703.00009</loc><lastmod>2017-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-model-and-its-inverse-of-an-audio-system-1703.00009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-model-and-its-inverse-of-an-audio-system-1703.00009"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-volterra-model-of-a-loudspeaker-behavior-based-on-laser-doppler-vibrometry-1703.00384</loc><lastmod>2017-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-volterra-model-of-a-loudspeaker-behavior-based-on-laser-doppler-vibrometry-1703.00384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-volterra-model-of-a-loudspeaker-behavior-based-on-laser-doppler-vibrometry-1703.00384"/></url>
<url><loc>https://scifaro.com/en/abs/sample-level-deep-convolutional-neural-networks-for-music-auto-tagging-using-raw-waveforms-1703.01789</loc><lastmod>2017-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-level-deep-convolutional-neural-networks-for-music-auto-tagging-using-raw-waveforms-1703.01789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-level-deep-convolutional-neural-networks-for-music-auto-tagging-using-raw-waveforms-1703.01789"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-bird-audio-detection-1703.02317</loc><lastmod>2017-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-bird-audio-detection-1703.02317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-bird-audio-detection-1703.02317"/></url>
<url><loc>https://scifaro.com/en/abs/linear-and-circular-microphone-array-for-remote-surveillance-simulated-performance-analysis-1703.02318</loc><lastmod>2017-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linear-and-circular-microphone-array-for-remote-surveillance-simulated-performance-analysis-1703.02318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linear-and-circular-microphone-array-for-remote-surveillance-simulated-performance-analysis-1703.02318"/></url>
<url><loc>https://scifaro.com/en/abs/audio-scene-classification-with-deep-recurrent-neural-networks-1703.04770</loc><lastmod>2017-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-scene-classification-with-deep-recurrent-neural-networks-1703.04770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-scene-classification-with-deep-recurrent-neural-networks-1703.04770"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-end-to-end-speech-recognition-1703.04783</loc><lastmod>2017-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-end-to-end-speech-recognition-1703.04783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-end-to-end-speech-recognition-1703.04783"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-importance-of-super-gaussian-speech-priors-for-machine-learning-based-speech-enhancement-1703.05003</loc><lastmod>2018-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-importance-of-super-gaussian-speech-priors-for-machine-learning-based-speech-enhancement-1703.05003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-importance-of-super-gaussian-speech-priors-for-machine-learning-based-speech-enhancement-1703.05003"/></url>
<url><loc>https://scifaro.com/en/abs/deducing-the-severity-of-psychiatric-symptoms-from-the-human-voice-1703.05344</loc><lastmod>2017-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deducing-the-severity-of-psychiatric-symptoms-from-the-human-voice-1703.05344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deducing-the-severity-of-psychiatric-symptoms-from-the-human-voice-1703.05344"/></url>
<url><loc>https://scifaro.com/en/abs/attention-and-localization-based-on-a-deep-convolutional-recurrent-model-for-weakly-supervised-audio-tagging-1703.06052</loc><lastmod>2017-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-and-localization-based-on-a-deep-convolutional-recurrent-model-for-weakly-supervised-audio-tagging-1703.06052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-and-localization-based-on-a-deep-convolutional-recurrent-model-for-weakly-supervised-audio-tagging-1703.06052"/></url>
<url><loc>https://scifaro.com/en/abs/multi-talker-speech-separation-with-utterance-level-permutation-invariant-training-of-deep-recurrent-neural-networks-1703.06284</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-talker-speech-separation-with-utterance-level-permutation-invariant-training-of-deep-recurrent-neural-networks-1703.06284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-talker-speech-separation-with-utterance-level-permutation-invariant-training-of-deep-recurrent-neural-networks-1703.06284"/></url>
<url><loc>https://scifaro.com/en/abs/gestalt-phenomenon-in-music-a-neurocognitive-physics-study-with-eeg-1703.06491</loc><lastmod>2017-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gestalt-phenomenon-in-music-a-neurocognitive-physics-study-with-eeg-1703.06491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gestalt-phenomenon-in-music-a-neurocognitive-physics-study-with-eeg-1703.06491"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-analysis-of-music-audio-signals-with-convolutional-neural-networks-1703.06697</loc><lastmod>2017-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-analysis-of-music-audio-signals-with-convolutional-neural-networks-1703.06697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-analysis-of-music-audio-signals-with-convolutional-neural-networks-1703.06697"/></url>
<url><loc>https://scifaro.com/en/abs/a-heuristic-approach-to-obtain-signal-envelope-with-a-simple-software-implementation-1703.06812</loc><lastmod>2019-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-heuristic-approach-to-obtain-signal-envelope-with-a-simple-software-implementation-1703.06812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-heuristic-approach-to-obtain-signal-envelope-with-a-simple-software-implementation-1703.06812"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-deep-learning-methods-for-environmental-sound-1703.06902</loc><lastmod>2017-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-deep-learning-methods-for-environmental-sound-1703.06902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-deep-learning-methods-for-environmental-sound-1703.06902"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-multi-class-audio-classification-in-noisy-in-vehicle-environment-1703.07065</loc><lastmod>2018-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-multi-class-audio-classification-in-noisy-in-vehicle-environment-1703.07065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-multi-class-audio-classification-in-noisy-in-vehicle-environment-1703.07065"/></url>
<url><loc>https://scifaro.com/en/abs/multi-objective-learning-and-mask-based-post-processing-for-deep-neural-network-based-speech-enhancement-1703.07172</loc><lastmod>2017-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-objective-learning-and-mask-based-post-processing-for-deep-neural-network-based-speech-enhancement-1703.07172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-objective-learning-and-mask-based-post-processing-for-deep-neural-network-based-speech-enhancement-1703.07172"/></url>
<url><loc>https://scifaro.com/en/abs/gate-activation-signal-analysis-for-gated-recurrent-neural-networks-and-its-correlation-with-phoneme-boundaries-1703.07588</loc><lastmod>2017-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gate-activation-signal-analysis-for-gated-recurrent-neural-networks-and-its-correlation-with-phoneme-boundaries-1703.07588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gate-activation-signal-analysis-for-gated-recurrent-neural-networks-and-its-correlation-with-phoneme-boundaries-1703.07588"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-audio-source-separation-using-convolutional-denoising-autoencoders-1703.08019</loc><lastmod>2017-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-audio-source-separation-using-convolutional-denoising-autoencoders-1703.08019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-audio-source-separation-using-convolutional-denoising-autoencoders-1703.08019"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-a-deep-mixture-of-experts-1703.09302</loc><lastmod>2017-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-a-deep-mixture-of-experts-1703.09302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-a-deep-mixture-of-experts-1703.09302"/></url>
<url><loc>https://scifaro.com/en/abs/midinet-a-convolutional-generative-adversarial-network-for-symbolic-domain-music-generation-1703.10847</loc><lastmod>2017-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midinet-a-convolutional-generative-adversarial-network-for-symbolic-domain-music-generation-1703.10847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midinet-a-convolutional-generative-adversarial-network-for-symbolic-domain-music-generation-1703.10847"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1703.10893</loc><lastmod>2018-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1703.10893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1703.10893"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-the-problem-of-audio-based-hit-song-prediction-using-convolutional-neural-networks-1704.01280</loc><lastmod>2017-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-the-problem-of-audio-based-hit-song-prediction-using-convolutional-neural-networks-1704.01280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-the-problem-of-audio-based-hit-song-prediction-using-convolutional-neural-networks-1704.01280"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-multi-talker-speech-with-permutation-invariant-training-1704.01985</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-multi-talker-speech-with-permutation-invariant-training-1704.01985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-multi-talker-speech-with-permutation-invariant-training-1704.01985"/></url>
<url><loc>https://scifaro.com/en/abs/obtain-real-time-beat-tracking-in-audio-signals-1704.02216</loc><lastmod>2017-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/obtain-real-time-beat-tracking-in-audio-signals-1704.02216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/obtain-real-time-beat-tracking-in-audio-signals-1704.02216"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-using-sequence-to-sequence-learning-of-context-posterior-probabilities-1704.02360</loc><lastmod>2017-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-using-sequence-to-sequence-learning-of-context-posterior-probabilities-1704.02360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-using-sequence-to-sequence-learning-of-context-posterior-probabilities-1704.02360"/></url>
<url><loc>https://scifaro.com/en/abs/time-contrastive-learning-based-dnn-bottleneck-features-for-text-dependent-speaker-verification-1704.02373</loc><lastmod>2019-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-contrastive-learning-based-dnn-bottleneck-features-for-text-dependent-speaker-verification-1704.02373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-contrastive-learning-based-dnn-bottleneck-features-for-text-dependent-speaker-verification-1704.02373"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-based-speech-parameter-generation-using-moment-matching-networks-1704.03626</loc><lastmod>2017-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-based-speech-parameter-generation-using-moment-matching-networks-1704.03626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-based-speech-parameter-generation-using-moment-matching-networks-1704.03626"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-parametric-singing-synthesizer-1704.03809</loc><lastmod>2017-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-parametric-singing-synthesizer-1704.03809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-parametric-singing-synthesizer-1704.03809"/></url>
<url><loc>https://scifaro.com/en/abs/i-vector-used-in-speaker-identification-by-dimension-compactness-1704.03934</loc><lastmod>2017-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-vector-used-in-speaker-identification-by-dimension-compactness-1704.03934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-vector-used-in-speaker-identification-by-dimension-compactness-1704.03934"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-by-gmm-based-i-vector-1704.03939</loc><lastmod>2017-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-by-gmm-based-i-vector-1704.03939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-by-gmm-based-i-vector-1704.03939"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-virtual-acoustics-on-dynamic-auditory-distance-perception-1704.06008</loc><lastmod>2017-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-virtual-acoustics-on-dynamic-auditory-distance-perception-1704.06008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-virtual-acoustics-on-dynamic-auditory-distance-perception-1704.06008"/></url>
<url><loc>https://scifaro.com/en/abs/using-speech-technology-for-quantifying-behavioral-characteristics-in-peer-led-team-learning-sessions-1704.07274</loc><lastmod>2017-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-speech-technology-for-quantifying-behavioral-characteristics-in-peer-led-team-learning-sessions-1704.07274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-speech-technology-for-quantifying-behavioral-characteristics-in-peer-led-team-learning-sessions-1704.07274"/></url>
<url><loc>https://scifaro.com/en/abs/design-of-robust-two-dimensional-polynomial-beamformers-as-a-convex-optimization-problem-with-application-to-robot-audition-1704.08953</loc><lastmod>2017-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-of-robust-two-dimensional-polynomial-beamformers-as-a-convex-optimization-problem-with-application-to-robot-audition-1704.08953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-of-robust-two-dimensional-polynomial-beamformers-as-a-convex-optimization-problem-with-application-to-robot-audition-1704.08953"/></url>
<url><loc>https://scifaro.com/en/abs/broadband-doa-estimation-using-convolutional-neural-networks-trained-with-noise-signals-1705.00919</loc><lastmod>2019-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/broadband-doa-estimation-using-convolutional-neural-networks-trained-with-noise-signals-1705.00919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/broadband-doa-estimation-using-convolutional-neural-networks-trained-with-noise-signals-1705.00919"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-temporal-constraints-for-a-system-of-interactive-scores-1705.01651</loc><lastmod>2017-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-temporal-constraints-for-a-system-of-interactive-scores-1705.01651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-temporal-constraints-for-a-system-of-interactive-scores-1705.01651"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-source-separation-with-adaptive-front-ends-1705.02514</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-source-separation-with-adaptive-front-ends-1705.02514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-source-separation-with-adaptive-front-ends-1705.02514"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speaker-feature-learning-for-text-independent-speaker-verification-1705.03670</loc><lastmod>2017-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speaker-feature-learning-for-text-independent-speaker-verification-1705.03670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speaker-feature-learning-for-text-independent-speaker-verification-1705.03670"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-singular-value-decomposition-for-efficient-spatial-audio-coding-1705.03877</loc><lastmod>2017-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-singular-value-decomposition-for-efficient-spatial-audio-coding-1705.03877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-singular-value-decomposition-for-efficient-spatial-audio-coding-1705.03877"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-audio-speaker-separation-with-source-contrastive-estimation-1705.04662</loc><lastmod>2017-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-audio-speaker-separation-with-source-contrastive-estimation-1705.04662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-audio-speaker-separation-with-source-contrastive-estimation-1705.04662"/></url>
<url><loc>https://scifaro.com/en/abs/riddim-a-rhythm-analysis-and-decomposition-tool-based-on-independent-subspace-analysis-1705.04792</loc><lastmod>2017-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/riddim-a-rhythm-analysis-and-decomposition-tool-based-on-independent-subspace-analysis-1705.04792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/riddim-a-rhythm-analysis-and-decomposition-tool-based-on-independent-subspace-analysis-1705.04792"/></url>
<url><loc>https://scifaro.com/en/abs/musical-instrument-recognition-using-their-distinctive-characteristics-in-artificial-neural-networks-1705.04971</loc><lastmod>2017-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-instrument-recognition-using-their-distinctive-characteristics-in-artificial-neural-networks-1705.04971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-instrument-recognition-using-their-distinctive-characteristics-in-artificial-neural-networks-1705.04971"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-the-latent-embedding-of-music-using-deep-neural-network-1705.05229</loc><lastmod>2017-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-the-latent-embedding-of-music-using-deep-neural-network-1705.05229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-the-latent-embedding-of-music-using-deep-neural-network-1705.05229"/></url>
<url><loc>https://scifaro.com/en/abs/texture-features-for-the-reproduction-of-the-perceptual-organization-of-sound-1705.05271</loc><lastmod>2017-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/texture-features-for-the-reproduction-of-the-perceptual-organization-of-sound-1705.05271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/texture-features-for-the-reproduction-of-the-perceptual-organization-of-sound-1705.05271"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-midi-a-painless-tutorial-on-midi-format-1705.05322</loc><lastmod>2018-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-midi-a-painless-tutorial-on-midi-format-1705.05322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-midi-a-painless-tutorial-on-midi-format-1705.05322"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-with-variational-recurrent-autoencoder-supported-by-history-1705.05458</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-with-variational-recurrent-autoencoder-supported-by-history-1705.05458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-with-variational-recurrent-autoencoder-supported-by-history-1705.05458"/></url>
<url><loc>https://scifaro.com/en/abs/a-biomimetic-vocalisation-system-for-miro-1705.05472</loc><lastmod>2017-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-biomimetic-vocalisation-system-for-miro-1705.05472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-biomimetic-vocalisation-system-for-miro-1705.05472"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-or-time-scale-representation-fission-and-fusion-rules-1705.05874</loc><lastmod>2017-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-or-time-scale-representation-fission-and-fusion-rules-1705.05874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-or-time-scale-representation-fission-and-fusion-rules-1705.05874"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-subset-selection-for-mvdr-beamformer-based-noise-reduction-1705.08255</loc><lastmod>2017-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-subset-selection-for-mvdr-beamformer-based-noise-reduction-1705.08255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-subset-selection-for-mvdr-beamformer-based-noise-reduction-1705.08255"/></url>
<url><loc>https://scifaro.com/en/abs/matrix-of-polynomials-model-based-polynomial-dictionary-learning-method-for-acoustic-impulse-response-modeling-1705.08660</loc><lastmod>2017-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matrix-of-polynomials-model-based-polynomial-dictionary-learning-method-for-acoustic-impulse-response-modeling-1705.08660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matrix-of-polynomials-model-based-polynomial-dictionary-learning-method-for-acoustic-impulse-response-modeling-1705.08660"/></url>
<url><loc>https://scifaro.com/en/abs/audio-replay-attack-detection-countermeasures-1705.08858</loc><lastmod>2017-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-replay-attack-detection-countermeasures-1705.08858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-replay-attack-detection-countermeasures-1705.08858"/></url>
<url><loc>https://scifaro.com/en/abs/anti-spoofing-methods-for-automatic-speakerverification-system-1705.08865</loc><lastmod>2017-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anti-spoofing-methods-for-automatic-speakerverification-system-1705.08865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anti-spoofing-methods-for-automatic-speakerverification-system-1705.08865"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-using-vae-for-i-vector-speaker-verification-1705.09185</loc><lastmod>2017-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-using-vae-for-i-vector-speaker-verification-1705.09185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-using-vae-for-i-vector-speaker-verification-1705.09185"/></url>
<url><loc>https://scifaro.com/en/abs/improved-i-vector-based-speaker-recognition-for-utterances-with-speaker-generated-non-speech-sounds-1705.09289</loc><lastmod>2017-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-i-vector-based-speaker-recognition-for-utterances-with-speaker-generated-non-speech-sounds-1705.09289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-i-vector-based-speaker-recognition-for-utterances-with-speaker-generated-non-speech-sounds-1705.09289"/></url>
<url><loc>https://scifaro.com/en/abs/on-residual-cnn-in-text-dependent-speaker-verification-task-1705.10134</loc><lastmod>2017-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-residual-cnn-in-text-dependent-speaker-verification-task-1705.10134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-residual-cnn-in-text-dependent-speaker-verification-task-1705.10134"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-uncertainty-estimation-for-weighted-dnn-hmm-asr-1705.10368</loc><lastmod>2017-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-uncertainty-estimation-for-weighted-dnn-hmm-asr-1705.10368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-uncertainty-estimation-for-weighted-dnn-hmm-asr-1705.10368"/></url>
<url><loc>https://scifaro.com/en/abs/collaborative-deep-learning-for-speech-enhancement-a-run-time-model-selection-method-using-autoencoders-1705.10385</loc><lastmod>2017-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collaborative-deep-learning-for-speech-enhancement-a-run-time-model-selection-method-using-autoencoders-1705.10385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collaborative-deep-learning-for-speech-enhancement-a-run-time-model-selection-method-using-autoencoders-1705.10385"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-environmentally-robust-speech-recognition-an-overview-of-recent-developments-1705.10874</loc><lastmod>2018-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-environmentally-robust-speech-recognition-an-overview-of-recent-developments-1705.10874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-environmentally-robust-speech-recognition-an-overview-of-recent-developments-1705.10874"/></url>
<url><loc>https://scifaro.com/en/abs/feature-extraction-for-machine-learning-based-crackle-detection-in-lung-sounds-from-a-health-survey-1706.00005</loc><lastmod>2017-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-extraction-for-machine-learning-based-crackle-detection-in-lung-sounds-from-a-health-survey-1706.00005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-extraction-for-machine-learning-based-crackle-detection-in-lung-sounds-from-a-health-survey-1706.00005"/></url>
<url><loc>https://scifaro.com/en/abs/mixed-penalization-in-convolutive-nonnegative-matrix-factorization-for-blind-speech-dereverberation-1706.00114</loc><lastmod>2017-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixed-penalization-in-convolutive-nonnegative-matrix-factorization-for-blind-speech-dereverberation-1706.00114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixed-penalization-in-convolutive-nonnegative-matrix-factorization-for-blind-speech-dereverberation-1706.00114"/></url>
<url><loc>https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1706.01777</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1706.01777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1706.01777"/></url>
<url><loc>https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-bird-audio-detection-1706.02047</loc><lastmod>2017-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-bird-audio-detection-1706.02047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-bird-audio-detection-1706.02047"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-replay-attack-and-anti-spoofing-for-automatic-speaker-verification-1706.02101</loc><lastmod>2017-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-replay-attack-and-anti-spoofing-for-automatic-speaker-verification-1706.02101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-replay-attack-and-anti-spoofing-for-automatic-speaker-verification-1706.02101"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-using-spatial-features-and-convolutional-recurrent-neural-network-1706.02291</loc><lastmod>2017-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-using-spatial-features-and-convolutional-recurrent-neural-network-1706.02291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-using-spatial-features-and-convolutional-recurrent-neural-network-1706.02291"/></url>
<url><loc>https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-music-emotion-recognition-1706.02292</loc><lastmod>2017-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-music-emotion-recognition-1706.02292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stacked-convolutional-and-recurrent-neural-networks-for-music-emotion-recognition-1706.02292"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-spatial-and-harmonic-features-1706.02293</loc><lastmod>2017-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-spatial-and-harmonic-features-1706.02293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-spatial-and-harmonic-features-1706.02293"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-network-bottleneck-features-for-noise-robust-speaker-verification-1706.03397</loc><lastmod>2017-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-network-bottleneck-features-for-noise-robust-speaker-verification-1706.03397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-network-bottleneck-features-for-noise-robust-speaker-verification-1706.03397"/></url>
<url><loc>https://scifaro.com/en/abs/blind-multichannel-identification-and-equalization-for-dereverberation-and-noise-reduction-based-on-convolutive-transfer-function-1706.03652</loc><lastmod>2018-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-multichannel-identification-and-equalization-for-dereverberation-and-noise-reduction-based-on-convolutive-transfer-function-1706.03652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-multichannel-identification-and-equalization-for-dereverberation-and-noise-reduction-based-on-convolutive-transfer-function-1706.03652"/></url>
<url><loc>https://scifaro.com/en/abs/learning-and-evaluating-musical-features-with-deep-autoencoders-1706.04486</loc><lastmod>2017-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-and-evaluating-musical-features-with-deep-autoencoders-1706.04486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-and-evaluating-musical-features-with-deep-autoencoders-1706.04486"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-potential-of-pseudo-quadrature-mirror-filter-banks-in-music-source-separation-tasks-1706.04924</loc><lastmod>2017-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-potential-of-pseudo-quadrature-mirror-filter-banks-in-music-source-separation-tasks-1706.04924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-potential-of-pseudo-quadrature-mirror-filter-banks-in-music-source-separation-tasks-1706.04924"/></url>
<url><loc>https://scifaro.com/en/abs/sut-system-description-for-nist-sre-2016-1706.05077</loc><lastmod>2017-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sut-system-description-for-nist-sre-2016-1706.05077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sut-system-description-for-nist-sre-2016-1706.05077"/></url>
<url><loc>https://scifaro.com/en/abs/kapre-on-gpu-audio-preprocessing-layers-for-a-quick-implementation-of-deep-neural-network-models-with-keras-1706.05781</loc><lastmod>2017-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kapre-on-gpu-audio-preprocessing-layers-for-a-quick-implementation-of-deep-neural-network-models-with-keras-1706.05781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kapre-on-gpu-audio-preprocessing-layers-for-a-quick-implementation-of-deep-neural-network-models-with-keras-1706.05781"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-approach-with-multi-channel-i-vectors-and-convolutional-neural-networks-for-acoustic-scene-classification-1706.06525</loc><lastmod>2017-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-approach-with-multi-channel-i-vectors-and-convolutional-neural-networks-for-acoustic-scene-classification-1706.06525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-approach-with-multi-channel-i-vectors-and-convolutional-neural-networks-for-acoustic-scene-classification-1706.06525"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-and-multi-scale-feature-aggregation-using-sample-level-deep-convolutional-neural-networks-for-music-classification-1706.06810</loc><lastmod>2017-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-and-multi-scale-feature-aggregation-using-sample-level-deep-convolutional-neural-networks-for-music-classification-1706.06810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-and-multi-scale-feature-aggregation-using-sample-level-deep-convolutional-neural-networks-for-music-classification-1706.06810"/></url>
<url><loc>https://scifaro.com/en/abs/a-wavenet-for-speech-denoising-1706.07162</loc><lastmod>2018-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-wavenet-for-speech-denoising-1706.07162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-wavenet-for-speech-denoising-1706.07162"/></url>
<url><loc>https://scifaro.com/en/abs/a-universal-negative-group-delay-filter-for-the-prediction-of-band-limited-signals-1706.07326</loc><lastmod>2017-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-universal-negative-group-delay-filter-for-the-prediction-of-band-limited-signals-1706.07326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-universal-negative-group-delay-filter-for-the-prediction-of-band-limited-signals-1706.07326"/></url>
<url><loc>https://scifaro.com/en/abs/toward-faultless-content-based-playlists-generation-for-instrumentals-1706.07613</loc><lastmod>2017-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-faultless-content-based-playlists-generation-for-instrumentals-1706.07613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-faultless-content-based-playlists-generation-for-instrumentals-1706.07613"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-acoustic-modeling-by-weakly-supervised-multi-task-deep-learning-using-acoustic-tokens-discovered-from-unlabeled-data-1706.07793</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-acoustic-modeling-by-weakly-supervised-multi-task-deep-learning-using-acoustic-tokens-discovered-from-unlabeled-data-1706.07793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-acoustic-modeling-by-weakly-supervised-multi-task-deep-learning-using-acoustic-tokens-discovered-from-unlabeled-data-1706.07793"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speaker-verification-do-we-need-end-to-end-1706.07859</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speaker-verification-do-we-need-end-to-end-1706.07859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speaker-verification-do-we-need-end-to-end-1706.07859"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-with-cough-laugh-and-wei-1706.07860</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-with-cough-laugh-and-wei-1706.07860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-with-cough-laugh-and-wei-1706.07860"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-deep-feature-learning-1706.07861</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-deep-feature-learning-1706.07861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-deep-feature-learning-1706.07861"/></url>
<url><loc>https://scifaro.com/en/abs/a-variational-em-method-for-pole-zero-modeling-of-speech-with-mixed-block-sparse-and-gaussian-excitation-1706.07927</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variational-em-method-for-pole-zero-modeling-of-speech-with-mixed-block-sparse-and-gaussian-excitation-1706.07927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variational-em-method-for-pole-zero-modeling-of-speech-with-mixed-block-sparse-and-gaussian-excitation-1706.07927"/></url>
<url><loc>https://scifaro.com/en/abs/between-homomorphic-signal-processing-and-deep-neural-networks-constructing-deep-algorithms-for-polyphonic-music-transcription-1706.08231</loc><lastmod>2017-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/between-homomorphic-signal-processing-and-deep-neural-networks-constructing-deep-algorithms-for-polyphonic-music-transcription-1706.08231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/between-homomorphic-signal-processing-and-deep-neural-networks-constructing-deep-algorithms-for-polyphonic-music-transcription-1706.08231"/></url>
<url><loc>https://scifaro.com/en/abs/voxceleb-a-large-scale-speaker-identification-dataset-1706.08612</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxceleb-a-large-scale-speaker-identification-dataset-1706.08612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxceleb-a-large-scale-speaker-identification-dataset-1706.08612"/></url>
<url><loc>https://scifaro.com/en/abs/impulsive-sound-detection-by-a-novel-energy-formula-and-its-usage-for-gunshot-recognition-1706.08759</loc><lastmod>2017-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impulsive-sound-detection-by-a-novel-energy-formula-and-its-usage-for-gunshot-recognition-1706.08759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impulsive-sound-detection-by-a-novel-energy-formula-and-its-usage-for-gunshot-recognition-1706.08759"/></url>
<url><loc>https://scifaro.com/en/abs/gabor-frames-and-deep-scattering-networks-in-audio-processing-1706.08818</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gabor-frames-and-deep-scattering-networks-in-audio-processing-1706.08818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gabor-frames-and-deep-scattering-networks-in-audio-processing-1706.08818"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-modeling-using-a-shallow-cnn-htsvm-architecture-1706.09055</loc><lastmod>2017-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-modeling-using-a-shallow-cnn-htsvm-architecture-1706.09055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-modeling-using-a-shallow-cnn-htsvm-architecture-1706.09055"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-musical-context-with-word2vec-1706.09088</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-musical-context-with-word2vec-1706.09088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-musical-context-with-word2vec-1706.09088"/></url>
<url><loc>https://scifaro.com/en/abs/on-a-novel-speech-representation-using-multitapered-modified-group-delay-function-1706.09386</loc><lastmod>2018-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-a-novel-speech-representation-using-multitapered-modified-group-delay-function-1706.09386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-a-novel-speech-representation-using-multitapered-modified-group-delay-function-1706.09386"/></url>
<url><loc>https://scifaro.com/en/abs/toward-inverse-control-of-physics-based-sound-synthesis-1706.09551</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-inverse-control-of-physics-based-sound-synthesis-1706.09551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-inverse-control-of-physics-based-sound-synthesis-1706.09551"/></url>
<url><loc>https://scifaro.com/en/abs/chord-label-personalization-through-deep-learning-of-integrated-harmonic-interval-based-representations-1706.09552</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-label-personalization-through-deep-learning-of-integrated-harmonic-interval-based-representations-1706.09552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-label-personalization-through-deep-learning-of-integrated-harmonic-interval-based-representations-1706.09552"/></url>
<url><loc>https://scifaro.com/en/abs/transforming-musical-signals-through-a-genre-classifying-convolutional-neural-network-1706.09553</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transforming-musical-signals-through-a-genre-classifying-convolutional-neural-network-1706.09553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transforming-musical-signals-through-a-genre-classifying-convolutional-neural-network-1706.09553"/></url>
<url><loc>https://scifaro.com/en/abs/music-signal-processing-using-vector-product-neural-networks-1706.09555</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-signal-processing-using-vector-product-neural-networks-1706.09555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-signal-processing-using-vector-product-neural-networks-1706.09555"/></url>
<url><loc>https://scifaro.com/en/abs/machine-listening-intelligence-1706.09557</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-listening-intelligence-1706.09557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-listening-intelligence-1706.09557"/></url>
<url><loc>https://scifaro.com/en/abs/talking-drums-generating-drum-grooves-with-neural-networks-1706.09558</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/talking-drums-generating-drum-grooves-with-neural-networks-1706.09558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/talking-drums-generating-drum-grooves-with-neural-networks-1706.09558"/></url>
<url><loc>https://scifaro.com/en/abs/audio-spectrogram-representations-for-processing-with-convolutional-neural-networks-1706.09559</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-spectrogram-representations-for-processing-with-convolutional-neural-networks-1706.09559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-spectrogram-representations-for-processing-with-convolutional-neural-networks-1706.09559"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-multi-band-densenets-for-audio-source-separation-1706.09588</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-multi-band-densenets-for-audio-source-separation-1706.09588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-multi-band-densenets-for-audio-source-separation-1706.09588"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-in-the-shouted-environment-using-suprasegmental-hidden-markov-models-1706.09691</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-in-the-shouted-environment-using-suprasegmental-hidden-markov-models-1706.09691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-in-the-shouted-environment-using-suprasegmental-hidden-markov-models-1706.09691"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speaker-identification-performance-under-the-shouted-talking-condition-using-second-order-circular-hidden-markov-models-1706.09716</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speaker-identification-performance-under-the-shouted-talking-condition-using-second-order-circular-hidden-markov-models-1706.09716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speaker-identification-performance-under-the-shouted-talking-condition-using-second-order-circular-hidden-markov-models-1706.09716"/></url>
<url><loc>https://scifaro.com/en/abs/employing-second-order-circular-suprasegmental-hidden-markov-models-to-enhance-speaker-identification-performance-in-shouted-talking-environments-1706.09722</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-second-order-circular-suprasegmental-hidden-markov-models-to-enhance-speaker-identification-performance-in-shouted-talking-environments-1706.09722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-second-order-circular-suprasegmental-hidden-markov-models-to-enhance-speaker-identification-performance-in-shouted-talking-environments-1706.09722"/></url>
<url><loc>https://scifaro.com/en/abs/talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-csphmm2s-1706.09729</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-csphmm2s-1706.09729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-csphmm2s-1706.09729"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-style-authentication-using-suprasegmental-hidden-markov-models-1706.09736</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-style-authentication-using-suprasegmental-hidden-markov-models-1706.09736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-style-authentication-using-suprasegmental-hidden-markov-models-1706.09736"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-investigation-and-analysis-in-unbiased-and-biased-emotional-talking-environments-1706.09754</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-investigation-and-analysis-in-unbiased-and-biased-emotional-talking-environments-1706.09754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-investigation-and-analysis-in-unbiased-and-biased-emotional-talking-environments-1706.09754"/></url>
<url><loc>https://scifaro.com/en/abs/using-second-order-hidden-markov-model-to-improve-speaker-identification-recognition-performance-under-neutral-condition-1706.09758</loc><lastmod>2017-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-second-order-hidden-markov-model-to-improve-speaker-identification-recognition-performance-under-neutral-condition-1706.09758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-second-order-hidden-markov-model-to-improve-speaker-identification-recognition-performance-under-neutral-condition-1706.09758"/></url>
<url><loc>https://scifaro.com/en/abs/employing-both-gender-and-emotion-cues-to-enhance-speaker-identification-performance-in-emotional-talking-environments-1706.09760</loc><lastmod>2017-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-both-gender-and-emotion-cues-to-enhance-speaker-identification-performance-in-emotional-talking-environments-1706.09760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-both-gender-and-emotion-cues-to-enhance-speaker-identification-performance-in-emotional-talking-environments-1706.09760"/></url>
<url><loc>https://scifaro.com/en/abs/automated-audio-captioning-with-recurrent-neural-networks-1706.10006</loc><lastmod>2017-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-audio-captioning-with-recurrent-neural-networks-1706.10006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-audio-captioning-with-recurrent-neural-networks-1706.10006"/></url>
<url><loc>https://scifaro.com/en/abs/employing-emotion-cues-to-verify-speakers-in-emotional-talking-environments-1707.00137</loc><lastmod>2017-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-emotion-cues-to-verify-speakers-in-emotional-talking-environments-1707.00137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-emotion-cues-to-verify-speakers-in-emotional-talking-environments-1707.00137"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-in-shouted-talking-environments-based-on-novel-third-order-hidden-markov-models-1707.00138</loc><lastmod>2017-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-in-shouted-talking-environments-based-on-novel-third-order-hidden-markov-models-1707.00138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-in-shouted-talking-environments-based-on-novel-third-order-hidden-markov-models-1707.00138"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-and-analyzing-the-vocal-tract-under-normal-and-stressful-talking-conditions-1707.00149</loc><lastmod>2017-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-and-analyzing-the-vocal-tract-under-normal-and-stressful-talking-conditions-1707.00149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-and-analyzing-the-vocal-tract-under-normal-and-stressful-talking-conditions-1707.00149"/></url>
<url><loc>https://scifaro.com/en/abs/an-augmented-lagrangian-method-for-piano-transcription-using-equal-loudness-thresholding-and-lstm-based-decoding-1707.00160</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-augmented-lagrangian-method-for-piano-transcription-using-equal-loudness-thresholding-and-lstm-based-decoding-1707.00160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-augmented-lagrangian-method-for-piano-transcription-using-equal-loudness-thresholding-and-lstm-based-decoding-1707.00160"/></url>
<url><loc>https://scifaro.com/en/abs/rank-1-constrained-multichannel-wiener-filter-for-speech-recognition-in-noisy-environments-1707.00201</loc><lastmod>2017-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rank-1-constrained-multichannel-wiener-filter-for-speech-recognition-in-noisy-environments-1707.00201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rank-1-constrained-multichannel-wiener-filter-for-speech-recognition-in-noisy-environments-1707.00201"/></url>
<url><loc>https://scifaro.com/en/abs/emirati-speaker-verification-based-on-hmm1s-hmm2s-and-hmm3s-1707.00276</loc><lastmod>2017-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emirati-speaker-verification-based-on-hmm1s-hmm2s-and-hmm3s-1707.00276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emirati-speaker-verification-based-on-hmm1s-hmm2s-and-hmm3s-1707.00276"/></url>
<url><loc>https://scifaro.com/en/abs/talking-condition-identification-using-second-order-hidden-markov-models-1707.00679</loc><lastmod>2017-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/talking-condition-identification-using-second-order-hidden-markov-models-1707.00679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/talking-condition-identification-using-second-order-hidden-markov-models-1707.00679"/></url>
<url><loc>https://scifaro.com/en/abs/studying-and-enhancing-talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-hmms-chmm2s-and-sphmms-1707.00680</loc><lastmod>2017-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/studying-and-enhancing-talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-hmms-chmm2s-and-sphmms-1707.00680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/studying-and-enhancing-talking-condition-recognition-in-stressful-and-emotional-talking-environments-based-on-hmms-chmm2s-and-sphmms-1707.00680"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-in-a-shouted-talking-environment-based-on-novel-third-order-circular-suprasegmental-hidden-markov-models-1707.00686</loc><lastmod>2017-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-in-a-shouted-talking-environment-based-on-novel-third-order-circular-suprasegmental-hidden-markov-models-1707.00686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-in-a-shouted-talking-environment-based-on-novel-third-order-circular-suprasegmental-hidden-markov-models-1707.00686"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-estimation-of-harmonic-tension-by-distributed-representation-of-chords-1707.00972</loc><lastmod>2017-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-estimation-of-harmonic-tension-by-distributed-representation-of-chords-1707.00972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-estimation-of-harmonic-tension-by-distributed-representation-of-chords-1707.00972"/></url>
<url><loc>https://scifaro.com/en/abs/hidden-markov-model-based-speech-enhancement-1707.01090</loc><lastmod>2017-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hidden-markov-model-based-speech-enhancement-1707.01090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hidden-markov-model-based-speech-enhancement-1707.01090"/></url>
<url><loc>https://scifaro.com/en/abs/pch2csd-an-application-for-converting-nord-modular-g2-patches-into-csound-code-1707.01653</loc><lastmod>2017-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pch2csd-an-application-for-converting-nord-modular-g2-patches-into-csound-code-1707.01653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pch2csd-an-application-for-converting-nord-modular-g2-patches-into-csound-code-1707.01653"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-generative-adversarial-networks-under-a-multi-task-learning-framework-1707.01670</loc><lastmod>2017-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-generative-adversarial-networks-under-a-multi-task-learning-framework-1707.01670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-using-generative-adversarial-networks-under-a-multi-task-learning-framework-1707.01670"/></url>
<url><loc>https://scifaro.com/en/abs/deep-cnn-framework-for-audio-event-recognition-using-weakly-labeled-web-data-1707.02530</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-cnn-framework-for-audio-event-recognition-using-weakly-labeled-web-data-1707.02530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-cnn-framework-for-audio-event-recognition-using-weakly-labeled-web-data-1707.02530"/></url>
<url><loc>https://scifaro.com/en/abs/model-based-speech-enhancement-in-the-modulation-domain-1707.02651</loc><lastmod>2018-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-based-speech-enhancement-in-the-modulation-domain-1707.02651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-based-speech-enhancement-in-the-modulation-domain-1707.02651"/></url>
<url><loc>https://scifaro.com/en/abs/feature-joint-state-posterior-estimation-in-factorial-speech-processing-models-using-deep-neural-networks-1707.02661</loc><lastmod>2017-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-joint-state-posterior-estimation-in-factorial-speech-processing-models-using-deep-neural-networks-1707.02661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-joint-state-posterior-estimation-in-factorial-speech-processing-models-using-deep-neural-networks-1707.02661"/></url>
<url><loc>https://scifaro.com/en/abs/score-informed-syllable-segmentation-for-a-cappella-singing-voice-with-convolutional-neural-networks-1707.03544</loc><lastmod>2017-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-informed-syllable-segmentation-for-a-cappella-singing-voice-with-convolutional-neural-networks-1707.03544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-informed-syllable-segmentation-for-a-cappella-singing-voice-with-convolutional-neural-networks-1707.03544"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-score-matching-by-combining-phonetic-and-duration-information-1707.03547</loc><lastmod>2017-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-score-matching-by-combining-phonetic-and-duration-information-1707.03547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-score-matching-by-combining-phonetic-and-duration-information-1707.03547"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-speech-separation-with-deep-attractor-network-1707.03634</loc><lastmod>2018-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-speech-separation-with-deep-attractor-network-1707.03634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-speech-separation-with-deep-attractor-network-1707.03634"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-multiple-features-and-modeling-methods-for-text-dependent-speaker-verification-1707.04373</loc><lastmod>2017-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-multiple-features-and-modeling-methods-for-text-dependent-speaker-verification-1707.04373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-multiple-features-and-modeling-methods-for-text-dependent-speaker-verification-1707.04373"/></url>
<url><loc>https://scifaro.com/en/abs/localization-of-sound-sources-in-a-room-with-one-microphone-1707.04504</loc><lastmod>2026-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-of-sound-sources-in-a-room-with-one-microphone-1707.04504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-of-sound-sources-in-a-room-with-one-microphone-1707.04504"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-abnormal-heart-sounds-using-deep-learning-1707.04642</loc><lastmod>2017-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-abnormal-heart-sounds-using-deep-learning-1707.04642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-abnormal-heart-sounds-using-deep-learning-1707.04642"/></url>
<url><loc>https://scifaro.com/en/abs/metrical-accent-aware-vocal-onset-detection-in-polyphonic-audio-1707.06163</loc><lastmod>2017-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metrical-accent-aware-vocal-onset-detection-in-polyphonic-audio-1707.06163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metrical-accent-aware-vocal-onset-detection-in-polyphonic-audio-1707.06163"/></url>
<url><loc>https://scifaro.com/en/abs/from-bach-to-the-beatles-the-simulation-of-human-tonal-expectation-using-ecologically-trained-predictive-models-1707.06231</loc><lastmod>2017-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-bach-to-the-beatles-the-simulation-of-human-tonal-expectation-using-ecologically-trained-predictive-models-1707.06231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-bach-to-the-beatles-the-simulation-of-human-tonal-expectation-using-ecologically-trained-predictive-models-1707.06231"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-multi-talker-speech-recognition-with-permutation-invariant-training-1707.06527</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-multi-talker-speech-recognition-with-permutation-invariant-training-1707.06527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-multi-talker-speech-recognition-with-permutation-invariant-training-1707.06527"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-sequence-representations-for-acoustic-event-classification-1707.08729</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-sequence-representations-for-acoustic-event-classification-1707.08729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-sequence-representations-for-acoustic-event-classification-1707.08729"/></url>
<url><loc>https://scifaro.com/en/abs/bearing-fault-diagnosis-under-varying-working-condition-based-on-domain-adaptation-1707.09890</loc><lastmod>2017-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bearing-fault-diagnosis-under-varying-working-condition-based-on-domain-adaptation-1707.09890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bearing-fault-diagnosis-under-varying-working-condition-based-on-domain-adaptation-1707.09890"/></url>
<url><loc>https://scifaro.com/en/abs/a-breakthrough-in-speech-emotion-recognition-using-deep-retinal-convolution-neural-networks-1707.09917</loc><lastmod>2017-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-breakthrough-in-speech-emotion-recognition-using-deep-retinal-convolution-neural-networks-1707.09917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-breakthrough-in-speech-emotion-recognition-using-deep-retinal-convolution-neural-networks-1707.09917"/></url>
<url><loc>https://scifaro.com/en/abs/audio-super-resolution-using-neural-networks-1708.00853</loc><lastmod>2017-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-super-resolution-using-neural-networks-1708.00853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-super-resolution-using-neural-networks-1708.00853"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoder-based-domain-adaptation-for-speaker-recognition-under-insufficient-channel-information-1708.01227</loc><lastmod>2017-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoder-based-domain-adaptation-for-speaker-recognition-under-insufficient-channel-information-1708.01227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoder-based-domain-adaptation-for-speaker-recognition-under-insufficient-channel-information-1708.01227"/></url>
<url><loc>https://scifaro.com/en/abs/recursive-whitening-transformation-for-speaker-recognition-on-language-mismatched-condition-1708.01232</loc><lastmod>2017-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recursive-whitening-transformation-for-speaker-recognition-on-language-mismatched-condition-1708.01232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recursive-whitening-transformation-for-speaker-recognition-on-language-mismatched-condition-1708.01232"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-single-channel-speech-enhancement-with-modulation-domain-kalman-filtering-1708.02171</loc><lastmod>2017-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-single-channel-speech-enhancement-with-modulation-domain-kalman-filtering-1708.02171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-single-channel-speech-enhancement-with-modulation-domain-kalman-filtering-1708.02171"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-raga-recognition-in-hindustani-classical-music-1708.02322</loc><lastmod>2017-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-raga-recognition-in-hindustani-classical-music-1708.02322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-raga-recognition-in-hindustani-classical-music-1708.02322"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-using-deep-recurrent-convolutional-neural-networks-for-speaker-embeddings-1708.02840</loc><lastmod>2017-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-using-deep-recurrent-convolutional-neural-networks-for-speaker-embeddings-1708.02840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-using-deep-recurrent-convolutional-neural-networks-for-speaker-embeddings-1708.02840"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-and-cnn-with-weighted-and-multi-task-loss-functions-for-audio-event-detection-1708.03211</loc><lastmod>2017-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-and-cnn-with-weighted-and-multi-task-loss-functions-for-audio-event-detection-1708.03211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-and-cnn-with-weighted-and-multi-task-loss-functions-for-audio-event-detection-1708.03211"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-transfer-learning-based-non-linear-feature-extraction-for-acoustic-event-classification-1708.03465</loc><lastmod>2017-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-transfer-learning-based-non-linear-feature-extraction-for-acoustic-event-classification-1708.03465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-transfer-learning-based-non-linear-feature-extraction-for-acoustic-event-classification-1708.03465"/></url>
<url><loc>https://scifaro.com/en/abs/neural-translation-of-musical-style-1708.03535</loc><lastmod>2017-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-translation-of-musical-style-1708.03535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-translation-of-musical-style-1708.03535"/></url>
<url><loc>https://scifaro.com/en/abs/classical-music-composition-using-state-space-models-1708.03822</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classical-music-composition-using-state-space-models-1708.03822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classical-music-composition-using-state-space-models-1708.03822"/></url>
<url><loc>https://scifaro.com/en/abs/creating-an-a-cappella-singing-audio-dataset-for-automatic-jingju-singing-evaluation-research-1708.03986</loc><lastmod>2017-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-an-a-cappella-singing-audio-dataset-for-automatic-jingju-singing-evaluation-research-1708.03986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-an-a-cappella-singing-audio-dataset-for-automatic-jingju-singing-evaluation-research-1708.03986"/></url>
<url><loc>https://scifaro.com/en/abs/convolutive-audio-source-separation-using-robust-ica-and-an-intelligent-evolving-permutation-ambiguity-solution-1708.03989</loc><lastmod>2018-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutive-audio-source-separation-using-robust-ica-and-an-intelligent-evolving-permutation-ambiguity-solution-1708.03989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutive-audio-source-separation-using-robust-ica-and-an-intelligent-evolving-permutation-ambiguity-solution-1708.03989"/></url>
<url><loc>https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-complex-student-s-t-distribution-for-blind-audio-source-separation-1708.04795</loc><lastmod>2017-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-complex-student-s-t-distribution-for-blind-audio-source-separation-1708.04795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-complex-student-s-t-distribution-for-blind-audio-source-separation-1708.04795"/></url>
<url><loc>https://scifaro.com/en/abs/a-generalised-directional-laplacian-distribution-estimation-mixture-models-and-audio-source-separation-1708.04816</loc><lastmod>2017-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generalised-directional-laplacian-distribution-estimation-mixture-models-and-audio-source-separation-1708.04816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generalised-directional-laplacian-distribution-estimation-mixture-models-and-audio-source-separation-1708.04816"/></url>
<url><loc>https://scifaro.com/en/abs/underdetermined-source-separation-using-a-sparse-stft-framework-and-weighted-laplacian-directional-modelling-1708.04821</loc><lastmod>2017-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underdetermined-source-separation-using-a-sparse-stft-framework-and-weighted-laplacian-directional-modelling-1708.04821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underdetermined-source-separation-using-a-sparse-stft-framework-and-weighted-laplacian-directional-modelling-1708.04821"/></url>
<url><loc>https://scifaro.com/en/abs/an-instrumental-intelligibility-metric-based-on-information-theory-1708.05132</loc><lastmod>2018-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-instrumental-intelligibility-metric-based-on-information-theory-1708.05132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-instrumental-intelligibility-metric-based-on-information-theory-1708.05132"/></url>
<url><loc>https://scifaro.com/en/abs/learning-musical-relations-using-gated-autoencoders-1708.05325</loc><lastmod>2017-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-musical-relations-using-gated-autoencoders-1708.05325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-musical-relations-using-gated-autoencoders-1708.05325"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-of-deep-neural-networks-for-acoustic-scene-classification-1708.05826</loc><lastmod>2017-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-of-deep-neural-networks-for-acoustic-scene-classification-1708.05826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-of-deep-neural-networks-for-acoustic-scene-classification-1708.05826"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-audio-loss-function-for-deep-learning-1708.05987</loc><lastmod>2017-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-audio-loss-function-for-deep-learning-1708.05987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-audio-loss-function-for-deep-learning-1708.05987"/></url>
<url><loc>https://scifaro.com/en/abs/an-evaluation-of-intrusive-instrumental-intelligibility-metrics-1708.06027</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-evaluation-of-intrusive-instrumental-intelligibility-metrics-1708.06027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-evaluation-of-intrusive-instrumental-intelligibility-metrics-1708.06027"/></url>
<url><loc>https://scifaro.com/en/abs/bitwise-source-separation-on-hashed-spectra-an-efficient-posterior-estimation-scheme-using-partial-rank-order-metrics-1708.06750</loc><lastmod>2017-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bitwise-source-separation-on-hashed-spectra-an-efficient-posterior-estimation-scheme-using-partial-rank-order-metrics-1708.06750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bitwise-source-separation-on-hashed-spectra-an-efficient-posterior-estimation-scheme-using-partial-rank-order-metrics-1708.06750"/></url>
<url><loc>https://scifaro.com/en/abs/capturing-long-term-temporal-dependencies-with-convolutional-networks-for-continuous-emotion-recognition-1708.07050</loc><lastmod>2017-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capturing-long-term-temporal-dependencies-with-convolutional-networks-for-continuous-emotion-recognition-1708.07050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capturing-long-term-temporal-dependencies-with-convolutional-networks-for-continuous-emotion-recognition-1708.07050"/></url>
<url><loc>https://scifaro.com/en/abs/object-based-audio-rendering-1708.07218</loc><lastmod>2017-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/object-based-audio-rendering-1708.07218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/object-based-audio-rendering-1708.07218"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-speech-enhancement-method-based-on-weighted-prediction-error-and-dnn-for-dereverberation-and-denoising-1708.08251</loc><lastmod>2017-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-speech-enhancement-method-based-on-weighted-prediction-error-and-dnn-for-dereverberation-and-denoising-1708.08251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-speech-enhancement-method-based-on-weighted-prediction-error-and-dnn-for-dereverberation-and-denoising-1708.08251"/></url>
<url><loc>https://scifaro.com/en/abs/improving-source-separation-via-multi-speaker-representations-1708.08740</loc><lastmod>2017-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-source-separation-via-multi-speaker-representations-1708.08740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-source-separation-via-multi-speaker-representations-1708.08740"/></url>
<url><loc>https://scifaro.com/en/abs/joint-separation-and-denoising-of-noisy-multi-talker-speech-using-recurrent-neural-networks-and-permutation-invariant-training-1708.09588</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-separation-and-denoising-of-noisy-multi-talker-speech-using-recurrent-neural-networks-and-permutation-invariant-training-1708.09588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-separation-and-denoising-of-noisy-multi-talker-speech-using-recurrent-neural-networks-and-permutation-invariant-training-1708.09588"/></url>
<url><loc>https://scifaro.com/en/abs/2-3-4-harmony-within-the-tritave-1709.00375</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/2-3-4-harmony-within-the-tritave-1709.00375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/2-3-4-harmony-within-the-tritave-1709.00375"/></url>
<url><loc>https://scifaro.com/en/abs/surrey-cvssp-system-for-dcase2017-challenge-task4-1709.00551</loc><lastmod>2017-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surrey-cvssp-system-for-dcase2017-challenge-task4-1709.00551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surrey-cvssp-system-for-dcase2017-challenge-task4-1709.00551"/></url>
<url><loc>https://scifaro.com/en/abs/a-recurrent-encoder-decoder-approach-with-skip-filtering-connections-for-monaural-singing-voice-separation-1709.00611</loc><lastmod>2018-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-recurrent-encoder-decoder-approach-with-skip-filtering-connections-for-monaural-singing-voice-separation-1709.00611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-recurrent-encoder-decoder-approach-with-skip-filtering-connections-for-monaural-singing-voice-separation-1709.00611"/></url>
<url><loc>https://scifaro.com/en/abs/using-optimal-ratio-mask-as-training-target-for-supervised-speech-separation-1709.00917</loc><lastmod>2017-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-optimal-ratio-mask-as-training-target-for-supervised-speech-separation-1709.00917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-optimal-ratio-mask-as-training-target-for-supervised-speech-separation-1709.00917"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1709.00944</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1709.00944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-multimodal-deep-convolutional-neural-networks-1709.00944"/></url>
<url><loc>https://scifaro.com/en/abs/information-theoretic-analysis-of-dnn-hmm-acoustic-modeling-1709.01144</loc><lastmod>2017-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/information-theoretic-analysis-of-dnn-hmm-acoustic-modeling-1709.01144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/information-theoretic-analysis-of-dnn-hmm-acoustic-modeling-1709.01144"/></url>
<url><loc>https://scifaro.com/en/abs/psd-estimation-of-multiple-sound-sources-in-a-reverberant-room-using-a-spherical-microphone-array-1709.01346</loc><lastmod>2018-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psd-estimation-of-multiple-sound-sources-in-a-reverberant-room-using-a-spherical-microphone-array-1709.01346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psd-estimation-of-multiple-sound-sources-in-a-reverberant-room-using-a-spherical-microphone-array-1709.01346"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-techniques-for-music-generation-a-survey-1709.01620</loc><lastmod>2019-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-techniques-for-music-generation-a-survey-1709.01620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-techniques-for-music-generation-a-survey-1709.01620"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-audio-signal-preprocessing-methods-for-deep-neural-networks-on-music-tagging-1709.01922</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-audio-signal-preprocessing-methods-for-deep-neural-networks-on-music-tagging-1709.01922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-audio-signal-preprocessing-methods-for-deep-neural-networks-on-music-tagging-1709.01922"/></url>
<url><loc>https://scifaro.com/en/abs/composition-by-conversation-1709.02076</loc><lastmod>2017-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composition-by-conversation-1709.02076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composition-by-conversation-1709.02076"/></url>
<url><loc>https://scifaro.com/en/abs/normalized-features-for-improving-the-generalization-of-dnn-based-speech-enhancement-1709.02175</loc><lastmod>2018-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/normalized-features-for-improving-the-generalization-of-dnn-based-speech-enhancement-1709.02175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/normalized-features-for-improving-the-generalization-of-dnn-based-speech-enhancement-1709.02175"/></url>
<url><loc>https://scifaro.com/en/abs/what-were-you-expecting-using-expectancy-features-to-predict-expressive-performances-of-classical-piano-music-1709.03629</loc><lastmod>2017-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-were-you-expecting-using-expectancy-features-to-predict-expressive-performances-of-classical-piano-music-1709.03629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-were-you-expecting-using-expectancy-features-to-predict-expressive-performances-of-classical-piano-music-1709.03629"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-and-unsupervised-speech-enhancement-using-nonnegative-matrix-factorization-1709.05362</loc><lastmod>2017-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-and-unsupervised-speech-enhancement-using-nonnegative-matrix-factorization-1709.05362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-and-unsupervised-speech-enhancement-using-nonnegative-matrix-factorization-1709.05362"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-using-nonnegative-convolutive-transfer-function-and-spectro-temporal-modeling-1709.05557</loc><lastmod>2017-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-using-nonnegative-convolutive-transfer-function-and-spectro-temporal-modeling-1709.05557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-using-nonnegative-convolutive-transfer-function-and-spectro-temporal-modeling-1709.05557"/></url>
<url><loc>https://scifaro.com/en/abs/nonnegative-hmm-for-babble-noise-derived-from-speech-hmm-application-to-speech-enhancement-1709.05559</loc><lastmod>2017-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonnegative-hmm-for-babble-noise-derived-from-speech-hmm-application-to-speech-enhancement-1709.05559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonnegative-hmm-for-babble-noise-derived-from-speech-hmm-application-to-speech-enhancement-1709.05559"/></url>
<url><loc>https://scifaro.com/en/abs/linear-computer-music-through-sequences-over-galois-fields-1709.06663</loc><lastmod>2018-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linear-computer-music-through-sequences-over-galois-fields-1709.06663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linear-computer-music-through-sequences-over-galois-fields-1709.06663"/></url>
<url><loc>https://scifaro.com/en/abs/deep-recurrent-nmf-for-speech-separation-by-unfolding-iterative-thresholding-1709.07124</loc><lastmod>2017-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-recurrent-nmf-for-speech-separation-by-unfolding-iterative-thresholding-1709.07124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-recurrent-nmf-for-speech-separation-by-unfolding-iterative-thresholding-1709.07124"/></url>
<url><loc>https://scifaro.com/en/abs/large-vocabulary-automatic-chord-estimation-using-deep-neural-nets-design-framework-system-variations-and-limitations-1709.07153</loc><lastmod>2017-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-vocabulary-automatic-chord-estimation-using-deep-neural-nets-design-framework-system-variations-and-limitations-1709.07153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-vocabulary-automatic-chord-estimation-using-deep-neural-nets-design-framework-system-variations-and-limitations-1709.07153"/></url>
<url><loc>https://scifaro.com/en/abs/a-fundamental-frequency-estimation-method-for-tonal-sounds-inspired-on-bird-song-studies-1709.07541</loc><lastmod>2019-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fundamental-frequency-estimation-method-for-tonal-sounds-inspired-on-bird-song-studies-1709.07541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fundamental-frequency-estimation-method-for-tonal-sounds-inspired-on-bird-song-studies-1709.07541"/></url>
<url><loc>https://scifaro.com/en/abs/techniques-and-challenges-in-speech-synthesis-1709.07552</loc><lastmod>2017-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/techniques-and-challenges-in-speech-synthesis-1709.07552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/techniques-and-challenges-in-speech-synthesis-1709.07552"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-alternatives-to-convolutive-audio-models-for-source-separation-1709.07908</loc><lastmod>2017-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-alternatives-to-convolutive-audio-models-for-source-separation-1709.07908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-alternatives-to-convolutive-audio-models-for-source-separation-1709.07908"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-incorporating-generative-adversarial-networks-1709.08041</loc><lastmod>2017-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-incorporating-generative-adversarial-networks-1709.08041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-parametric-speech-synthesis-incorporating-generative-adversarial-networks-1709.08041"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-dsp-deep-learning-approach-to-real-time-full-band-speech-enhancement-1709.08243</loc><lastmod>2018-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-dsp-deep-learning-approach-to-real-time-full-band-speech-enhancement-1709.08243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-dsp-deep-learning-approach-to-real-time-full-band-speech-enhancement-1709.08243"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-interviewee-attitude-and-body-language-from-speech-descriptors-1709.08344</loc><lastmod>2017-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-interviewee-attitude-and-body-language-from-speech-descriptors-1709.08344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-interviewee-attitude-and-body-language-from-speech-descriptors-1709.08344"/></url>
<url><loc>https://scifaro.com/en/abs/research-on-several-key-technologies-in-practical-speech-emotion-recognition-1709.09364</loc><lastmod>2017-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-on-several-key-technologies-in-practical-speech-emotion-recognition-1709.09364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-on-several-key-technologies-in-practical-speech-emotion-recognition-1709.09364"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-complex-network-structure-of-musical-pieces-analysis-of-some-use-cases-from-different-music-genres-1709.09708</loc><lastmod>2017-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-complex-network-structure-of-musical-pieces-analysis-of-some-use-cases-from-different-music-genres-1709.09708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-complex-network-structure-of-musical-pieces-analysis-of-some-use-cases-from-different-music-genres-1709.09708"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-wind-noise-detection-and-suppression-with-neural-based-signal-reconstruction-for-mult-channel-low-power-devices-1710.00082</loc><lastmod>2017-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-wind-noise-detection-and-suppression-with-neural-based-signal-reconstruction-for-mult-channel-low-power-devices-1710.00082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-wind-noise-detection-and-suppression-with-neural-based-signal-reconstruction-for-mult-channel-low-power-devices-1710.00082"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-weakly-supervised-audio-classification-using-gated-convolutional-neural-network-1710.00343</loc><lastmod>2017-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-weakly-supervised-audio-classification-using-gated-convolutional-neural-network-1710.00343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-weakly-supervised-audio-classification-using-gated-convolutional-neural-network-1710.00343"/></url>
<url><loc>https://scifaro.com/en/abs/improving-compression-based-dissimilarity-measure-for-music-score-analysis-1710.01446</loc><lastmod>2017-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-compression-based-dissimilarity-measure-for-music-score-analysis-1710.01446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-compression-based-dissimilarity-measure-for-music-score-analysis-1710.01446"/></url>
<url><loc>https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-parametric-majorization-equalization-algorithm-1710.01589</loc><lastmod>2017-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-parametric-majorization-equalization-algorithm-1710.01589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-parametric-majorization-equalization-algorithm-1710.01589"/></url>
<url><loc>https://scifaro.com/en/abs/generating-nontrivial-melodies-for-music-as-a-service-1710.02280</loc><lastmod>2017-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-nontrivial-melodies-for-music-as-a-service-1710.02280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-nontrivial-melodies-for-music-as-a-service-1710.02280"/></url>
<url><loc>https://scifaro.com/en/abs/a-report-on-sound-event-detection-with-different-binaural-features-1710.02997</loc><lastmod>2017-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-report-on-sound-event-detection-with-different-binaural-features-1710.02997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-report-on-sound-event-detection-with-different-binaural-features-1710.02997"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-dataset-with-stacked-convolutional-and-recurrent-neural-network-1710.02998</loc><lastmod>2017-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-dataset-with-stacked-convolutional-and-recurrent-neural-network-1710.02998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-dataset-with-stacked-convolutional-and-recurrent-neural-network-1710.02998"/></url>
<url><loc>https://scifaro.com/en/abs/pyroomacoustics-a-python-package-for-audio-room-simulations-and-array-processing-algorithms-1710.04196</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pyroomacoustics-a-python-package-for-audio-room-simulations-and-array-processing-algorithms-1710.04196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pyroomacoustics-a-python-package-for-audio-room-simulations-and-array-processing-algorithms-1710.04196"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-of-music-using-artist-labels-1710.06648</loc><lastmod>2018-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-of-music-using-artist-labels-1710.06648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-of-music-using-artist-labels-1710.06648"/></url>
<url><loc>https://scifaro.com/en/abs/deep-voice-3-scaling-text-to-speech-with-convolutional-sequence-learning-1710.07654</loc><lastmod>2018-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-voice-3-scaling-text-to-speech-with-convolutional-sequence-learning-1710.07654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-voice-3-scaling-text-to-speech-with-convolutional-sequence-learning-1710.07654"/></url>
<url><loc>https://scifaro.com/en/abs/deep-triphone-embedding-improves-phoneme-recognition-1710.07868</loc><lastmod>2017-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-triphone-embedding-improves-phoneme-recognition-1710.07868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-triphone-embedding-improves-phoneme-recognition-1710.07868"/></url>
<url><loc>https://scifaro.com/en/abs/listening-to-the-world-improves-speech-command-recognition-1710.08377</loc><lastmod>2017-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-to-the-world-improves-speech-command-recognition-1710.08377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-to-the-world-improves-speech-command-recognition-1710.08377"/></url>
<url><loc>https://scifaro.com/en/abs/inferring-room-semantics-using-acoustic-monitoring-1710.08684</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inferring-room-semantics-using-acoustic-monitoring-1710.08684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inferring-room-semantics-using-acoustic-monitoring-1710.08684"/></url>
<url><loc>https://scifaro.com/en/abs/efficiently-trainable-text-to-speech-system-based-on-deep-convolutional-networks-with-guided-attention-1710.08969</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficiently-trainable-text-to-speech-system-based-on-deep-convolutional-networks-with-guided-attention-1710.08969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficiently-trainable-text-to-speech-system-based-on-deep-convolutional-networks-with-guided-attention-1710.08969"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-optimized-speech-coding-with-deep-neural-networks-1710.09064</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-optimized-speech-coding-with-deep-neural-networks-1710.09064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-optimized-speech-coding-with-deep-neural-networks-1710.09064"/></url>
<url><loc>https://scifaro.com/en/abs/relative-transfer-function-inverse-regression-from-low-dimensional-manifold-1710.09091</loc><lastmod>2017-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relative-transfer-function-inverse-regression-from-low-dimensional-manifold-1710.09091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relative-transfer-function-inverse-regression-from-low-dimensional-manifold-1710.09091"/></url>
<url><loc>https://scifaro.com/en/abs/separation-of-moving-sound-sources-using-multichannel-nmf-and-acoustic-tracking-1710.10005</loc><lastmod>2017-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separation-of-moving-sound-sources-using-multichannel-nmf-and-acoustic-tracking-1710.10005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separation-of-moving-sound-sources-using-multichannel-nmf-and-acoustic-tracking-1710.10005"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-estimation-for-multiple-sound-sources-using-convolutional-recurrent-neural-network-1710.10059</loc><lastmod>2018-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-for-multiple-sound-sources-using-convolutional-recurrent-neural-network-1710.10059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-for-multiple-sound-sources-using-convolutional-recurrent-neural-network-1710.10059"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-frame-alignments-for-gmm-based-digit-prompted-speaker-verification-1710.10436</loc><lastmod>2018-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-frame-alignments-for-gmm-based-digit-prompted-speaker-verification-1710.10436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-frame-alignments-for-gmm-based-digit-prompted-speaker-verification-1710.10436"/></url>
<url><loc>https://scifaro.com/en/abs/sample-level-cnn-architectures-for-music-auto-tagging-using-raw-waveforms-1710.10451</loc><lastmod>2018-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-level-cnn-architectures-for-music-auto-tagging-using-raw-waveforms-1710.10451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-level-cnn-architectures-for-music-auto-tagging-using-raw-waveforms-1710.10451"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-source-separation-1710.10779</loc><lastmod>2017-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-source-separation-1710.10779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-source-separation-1710.10779"/></url>
<url><loc>https://scifaro.com/en/abs/sound-source-localization-in-a-multipath-environment-using-convolutional-neural-networks-1710.10948</loc><lastmod>2017-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-source-localization-in-a-multipath-environment-using-convolutional-neural-networks-1710.10948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-source-localization-in-a-multipath-environment-using-convolutional-neural-networks-1710.10948"/></url>
<url><loc>https://scifaro.com/en/abs/content-based-representations-of-audio-using-siamese-neural-networks-1710.10974</loc><lastmod>2018-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-based-representations-of-audio-using-siamese-neural-networks-1710.10974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-based-representations-of-audio-using-siamese-neural-networks-1710.10974"/></url>
<url><loc>https://scifaro.com/en/abs/onsets-and-frames-dual-objective-piano-transcription-1710.11153</loc><lastmod>2018-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/onsets-and-frames-dual-objective-piano-transcription-1710.11153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/onsets-and-frames-dual-objective-piano-transcription-1710.11153"/></url>
<url><loc>https://scifaro.com/en/abs/audio-style-transfer-1710.11385</loc><lastmod>2019-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-style-transfer-1710.11385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-style-transfer-1710.11385"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-music-generation-with-sequence-generative-adversarial-networks-1710.11418</loc><lastmod>2018-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-music-generation-with-sequence-generative-adversarial-networks-1710.11418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-music-generation-with-sequence-generative-adversarial-networks-1710.11418"/></url>
<url><loc>https://scifaro.com/en/abs/svsgan-singing-voice-separation-via-generative-adversarial-network-1710.11428</loc><lastmod>2017-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svsgan-singing-voice-separation-via-generative-adversarial-network-1710.11428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svsgan-singing-voice-separation-via-generative-adversarial-network-1710.11428"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-speech-enhancement-based-on-probabilistic-integration-of-variational-autoencoder-and-non-negative-matrix-factorization-1710.11439</loc><lastmod>2019-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-speech-enhancement-based-on-probabilistic-integration-of-variational-autoencoder-and-non-negative-matrix-factorization-1710.11439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-speech-enhancement-based-on-probabilistic-integration-of-variational-autoencoder-and-non-negative-matrix-factorization-1710.11439"/></url>
<url><loc>https://scifaro.com/en/abs/multi-resolution-fully-convolutional-neural-networks-for-monaural-audio-source-separation-1710.11473</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-resolution-fully-convolutional-neural-networks-for-monaural-audio-source-separation-1710.11473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-resolution-fully-convolutional-neural-networks-for-monaural-audio-source-separation-1710.11473"/></url>
<url><loc>https://scifaro.com/en/abs/melody-generation-for-pop-music-via-word-representation-of-musical-properties-1710.11549</loc><lastmod>2017-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-generation-for-pop-music-via-word-representation-of-musical-properties-1710.11549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-generation-for-pop-music-via-word-representation-of-musical-properties-1710.11549"/></url>
<url><loc>https://scifaro.com/en/abs/user-environment-detection-with-acoustic-sensors-embedded-on-mobile-devices-for-the-recognition-of-activities-of-daily-living-1711.00124</loc><lastmod>2017-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/user-environment-detection-with-acoustic-sensors-embedded-on-mobile-devices-for-the-recognition-of-activities-of-daily-living-1711.00124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/user-environment-detection-with-acoustic-sensors-embedded-on-mobile-devices-for-the-recognition-of-activities-of-daily-living-1711.00124"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-model-complexity-for-dnn-based-large-scale-audio-classification-1711.00229</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-model-complexity-for-dnn-based-large-scale-audio-classification-1711.00229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-model-complexity-for-dnn-based-large-scale-audio-classification-1711.00229"/></url>
<url><loc>https://scifaro.com/en/abs/shift-invariant-kernel-additive-modelling-for-audio-source-separation-1711.00351</loc><lastmod>2018-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shift-invariant-kernel-additive-modelling-for-audio-source-separation-1711.00351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shift-invariant-kernel-additive-modelling-for-audio-source-separation-1711.00351"/></url>
<url><loc>https://scifaro.com/en/abs/full-info-training-for-deep-speaker-feature-learning-1711.00366</loc><lastmod>2018-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-info-training-for-deep-speaker-feature-learning-1711.00366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-info-training-for-deep-speaker-feature-learning-1711.00366"/></url>
<url><loc>https://scifaro.com/en/abs/tasnet-time-domain-audio-separation-network-for-real-time-single-channel-speech-separation-1711.00541</loc><lastmod>2018-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tasnet-time-domain-audio-separation-network-for-real-time-single-channel-speech-separation-1711.00541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tasnet-time-domain-audio-separation-network-for-real-time-single-channel-speech-separation-1711.00541"/></url>
<url><loc>https://scifaro.com/en/abs/framework-for-evaluation-of-sound-event-detection-in-web-videos-1711.00804</loc><lastmod>2018-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/framework-for-evaluation-of-sound-event-detection-in-web-videos-1711.00804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/framework-for-evaluation-of-sound-event-detection-in-web-videos-1711.00804"/></url>
<url><loc>https://scifaro.com/en/abs/does-phase-matter-for-monaural-source-separation-1711.00913</loc><lastmod>2017-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-phase-matter-for-monaural-source-separation-1711.00913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-phase-matter-for-monaural-source-separation-1711.00913"/></url>
<url><loc>https://scifaro.com/en/abs/audio-set-classification-with-attention-model-a-probabilistic-perspective-1711.00927</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-set-classification-with-attention-model-a-probabilistic-perspective-1711.00927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-set-classification-with-attention-model-a-probabilistic-perspective-1711.00927"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-transfer-from-weakly-labeled-audio-using-convolutional-neural-network-for-sound-events-and-scenes-1711.01369</loc><lastmod>2018-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-transfer-from-weakly-labeled-audio-using-convolutional-neural-network-for-sound-events-and-scenes-1711.01369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-transfer-from-weakly-labeled-audio-using-convolutional-neural-network-for-sound-events-and-scenes-1711.01369"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-singing-voice-separation-with-skip-filtering-connections-and-recurrent-inference-of-time-frequency-mask-1711.01437</loc><lastmod>2018-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-singing-voice-separation-with-skip-filtering-connections-and-recurrent-inference-of-time-frequency-mask-1711.01437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-singing-voice-separation-with-skip-filtering-connections-and-recurrent-inference-of-time-frequency-mask-1711.01437"/></url>
<url><loc>https://scifaro.com/en/abs/mandarin-tone-modeling-using-recurrent-neural-networks-1711.01946</loc><lastmod>2017-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mandarin-tone-modeling-using-recurrent-neural-networks-1711.01946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mandarin-tone-modeling-using-recurrent-neural-networks-1711.01946"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-learning-of-semantic-audio-representations-1711.02209</loc><lastmod>2017-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-learning-of-semantic-audio-representations-1711.02209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-learning-of-semantic-audio-representations-1711.02209"/></url>
<url><loc>https://scifaro.com/en/abs/non-uniform-time-scaling-of-carnatic-music-transients-1711.02318</loc><lastmod>2017-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-uniform-time-scaling-of-carnatic-music-transients-1711.02318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-uniform-time-scaling-of-carnatic-music-transients-1711.02318"/></url>
<url><loc>https://scifaro.com/en/abs/the-accompanion-v0-1-an-expressive-accompaniment-system-1711.02427</loc><lastmod>2017-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-accompanion-v0-1-an-expressive-accompaniment-system-1711.02427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-accompanion-v0-1-an-expressive-accompaniment-system-1711.02427"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-learning-for-music-audio-tagging-at-scale-1711.02520</loc><lastmod>2018-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-learning-for-music-audio-tagging-at-scale-1711.02520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-learning-for-music-audio-tagging-at-scale-1711.02520"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-separation-classification-model-for-sound-event-detection-of-weakly-labelled-data-1711.03037</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-separation-classification-model-for-sound-event-detection-of-weakly-labelled-data-1711.03037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-separation-classification-model-for-sound-event-detection-of-weakly-labelled-data-1711.03037"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-audio-source-separation-via-spectrum-energy-preserved-wasserstein-learning-1711.04121</loc><lastmod>2018-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-audio-source-separation-via-spectrum-energy-preserved-wasserstein-learning-1711.04121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-audio-source-separation-via-spectrum-energy-preserved-wasserstein-learning-1711.04121"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-score-alignment-of-piano-music-using-rnn-based-automatic-music-transcription-1711.04480</loc><lastmod>2017-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-score-alignment-of-piano-music-using-rnn-based-automatic-music-transcription-1711.04480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-score-alignment-of-piano-music-using-rnn-based-automatic-music-transcription-1711.04480"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-tuning-of-two-dimensional-keyboards-1711.05260</loc><lastmod>2017-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-tuning-of-two-dimensional-keyboards-1711.05260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-tuning-of-two-dimensional-keyboards-1711.05260"/></url>
<url><loc>https://scifaro.com/en/abs/human-and-machine-speaker-recognition-based-on-short-trivial-events-1711.05443</loc><lastmod>2018-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-and-machine-speaker-recognition-based-on-short-trivial-events-1711.05443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-and-machine-speaker-recognition-based-on-short-trivial-events-1711.05443"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-end-to-end-neural-speech-synthesizer-1711.05447</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-end-to-end-neural-speech-synthesizer-1711.05447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-end-to-end-neural-speech-synthesizer-1711.05447"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speech-enhancement-with-generative-adversarial-networks-for-robust-speech-recognition-1711.05747</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speech-enhancement-with-generative-adversarial-networks-for-robust-speech-recognition-1711.05747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speech-enhancement-with-generative-adversarial-networks-for-robust-speech-recognition-1711.05747"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-with-context-aware-recurrent-neural-networks-1711.06309</loc><lastmod>2017-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-with-context-aware-recurrent-neural-networks-1711.06309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-with-context-aware-recurrent-neural-networks-1711.06309"/></url>
<url><loc>https://scifaro.com/en/abs/a-double-joint-bayesian-approach-for-j-vector-based-text-dependent-speaker-verification-1711.06434</loc><lastmod>2017-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-double-joint-bayesian-approach-for-j-vector-based-text-dependent-speaker-verification-1711.06434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-double-joint-bayesian-approach-for-j-vector-based-text-dependent-speaker-verification-1711.06434"/></url>
<url><loc>https://scifaro.com/en/abs/separake-source-separation-with-a-little-help-from-echoes-1711.06805</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separake-source-separation-with-a-little-help-from-echoes-1711.06805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separake-source-separation-with-a-little-help-from-echoes-1711.06805"/></url>
<url><loc>https://scifaro.com/en/abs/hello-edge-keyword-spotting-on-microcontrollers-1711.07128</loc><lastmod>2018-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hello-edge-keyword-spotting-on-microcontrollers-1711.07128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hello-edge-keyword-spotting-on-microcontrollers-1711.07128"/></url>
<url><loc>https://scifaro.com/en/abs/jambot-music-theory-aware-chord-based-generation-of-polyphonic-music-with-lstms-1711.07682</loc><lastmod>2017-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jambot-music-theory-aware-chord-based-generation-of-polyphonic-music-with-lstms-1711.07682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jambot-music-theory-aware-chord-based-generation-of-polyphonic-music-with-lstms-1711.07682"/></url>
<url><loc>https://scifaro.com/en/abs/reflection-aware-sound-source-localization-1711.07791</loc><lastmod>2017-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reflection-aware-sound-source-localization-1711.07791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reflection-aware-sound-source-localization-1711.07791"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-speech-separation-and-enhancement-using-the-convolutive-transfer-function-1711.07911</loc><lastmod>2019-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-speech-separation-and-enhancement-using-the-convolutive-transfer-function-1711.07911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-speech-separation-and-enhancement-using-the-convolutive-transfer-function-1711.07911"/></url>
<url><loc>https://scifaro.com/en/abs/assessment-of-sound-spatialisation-algorithms-for-sonic-rendering-with-headsets-1711.09234</loc><lastmod>2017-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessment-of-sound-spatialisation-algorithms-for-sonic-rendering-with-headsets-1711.09234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessment-of-sound-spatialisation-algorithms-for-sonic-rendering-with-headsets-1711.09234"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-nontrivial-connectivity-for-automatic-speech-recognition-1711.10271</loc><lastmod>2017-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-nontrivial-connectivity-for-automatic-speech-recognition-1711.10271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-nontrivial-connectivity-for-automatic-speech-recognition-1711.10271"/></url>
<url><loc>https://scifaro.com/en/abs/now-playing-continuous-low-power-music-recognition-1711.10958</loc><lastmod>2017-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/now-playing-continuous-low-power-music-recognition-1711.10958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/now-playing-continuous-low-power-music-recognition-1711.10958"/></url>
<url><loc>https://scifaro.com/en/abs/stream-attention-for-far-field-multi-microphone-asr-1711.11141</loc><lastmod>2017-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stream-attention-for-far-field-multi-microphone-asr-1711.11141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stream-attention-for-far-field-multi-microphone-asr-1711.11141"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-neural-audio-style-transfer-1711.11160</loc><lastmod>2017-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-neural-audio-style-transfer-1711.11160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-neural-audio-style-transfer-1711.11160"/></url>
<url><loc>https://scifaro.com/en/abs/a-modeling-and-algorithmic-framework-for-non-social-co-sparse-audio-restoration-1711.11259</loc><lastmod>2017-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-modeling-and-algorithmic-framework-for-non-social-co-sparse-audio-restoration-1711.11259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-modeling-and-algorithmic-framework-for-non-social-co-sparse-audio-restoration-1711.11259"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-networks-for-multiple-speaker-detection-and-localization-1711.11565</loc><lastmod>2018-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-networks-for-multiple-speaker-detection-and-localization-1711.11565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-networks-for-multiple-speaker-detection-and-localization-1711.11565"/></url>
<url><loc>https://scifaro.com/en/abs/audio-cover-song-identification-using-convolutional-neural-network-1712.00166</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-cover-song-identification-using-convolutional-neural-network-1712.00166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-cover-song-identification-using-convolutional-neural-network-1712.00166"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-from-the-sound-of-the-human-breath-1712.00171</loc><lastmod>2017-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-from-the-sound-of-the-human-breath-1712.00171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-from-the-sound-of-the-human-breath-1712.00171"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-domain-knowledge-in-end-to-end-audio-processing-1712.00254</loc><lastmod>2017-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-domain-knowledge-in-end-to-end-audio-processing-1712.00254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-domain-knowledge-in-end-to-end-audio-processing-1712.00254"/></url>
<url><loc>https://scifaro.com/en/abs/raw-waveform-based-audio-classification-using-sample-level-cnn-architectures-1712.00866</loc><lastmod>2017-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raw-waveform-based-audio-classification-using-sample-level-cnn-architectures-1712.00866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raw-waveform-based-audio-classification-using-sample-level-cnn-architectures-1712.00866"/></url>
<url><loc>https://scifaro.com/en/abs/a-text-independent-speaker-verification-model-a-comparative-analysis-1712.00917</loc><lastmod>2017-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-text-independent-speaker-verification-model-a-comparative-analysis-1712.00917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-text-independent-speaker-verification-model-a-comparative-analysis-1712.00917"/></url>
<url><loc>https://scifaro.com/en/abs/chord-generation-from-symbolic-melody-using-blstm-networks-1712.01011</loc><lastmod>2017-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-generation-from-symbolic-melody-using-blstm-networks-1712.01011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-generation-from-symbolic-melody-using-blstm-networks-1712.01011"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-early-audio-event-detection-with-neural-networks-1712.02116</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-early-audio-event-detection-with-neural-networks-1712.02116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-early-audio-event-detection-with-neural-networks-1712.02116"/></url>
<url><loc>https://scifaro.com/en/abs/representations-of-sound-in-deep-learning-of-audio-features-from-music-1712.02898</loc><lastmod>2017-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representations-of-sound-in-deep-learning-of-audio-features-from-music-1712.02898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representations-of-sound-in-deep-learning-of-audio-features-from-music-1712.02898"/></url>
<url><loc>https://scifaro.com/en/abs/music-transcription-by-deep-learning-with-data-and-artificial-semantic-augmentation-1712.03228</loc><lastmod>2017-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-transcription-by-deep-learning-with-data-and-artificial-semantic-augmentation-1712.03228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-transcription-by-deep-learning-with-data-and-artificial-semantic-augmentation-1712.03228"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-implementation-of-the-room-simulator-for-training-deep-neural-network-acoustic-models-1712.03439</loc><lastmod>2019-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-implementation-of-the-room-simulator-for-training-deep-neural-network-acoustic-models-1712.03439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-implementation-of-the-room-simulator-for-training-deep-neural-network-acoustic-models-1712.03439"/></url>
<url><loc>https://scifaro.com/en/abs/the-organization-of-a-three-manual-keyboard-for-53-tone-tempered-and-other-tempered-systems-1712.03569</loc><lastmod>2017-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-organization-of-a-three-manual-keyboard-for-53-tone-tempered-and-other-tempered-systems-1712.03569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-organization-of-a-three-manual-keyboard-for-53-tone-tempered-and-other-tempered-systems-1712.03569"/></url>
<url><loc>https://scifaro.com/en/abs/prodorshok-i-a-bengali-isolated-speech-dataset-for-voice-based-assistive-technologies-a-comparative-analysis-of-the-effects-of-data-augmentation-on-hmm-gmm-and-dnn-classifiers-1712.03579</loc><lastmod>2017-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prodorshok-i-a-bengali-isolated-speech-dataset-for-voice-based-assistive-technologies-a-comparative-analysis-of-the-effects-of-data-augmentation-on-hmm-gmm-and-dnn-classifiers-1712.03579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prodorshok-i-a-bengali-isolated-speech-dataset-for-voice-based-assistive-technologies-a-comparative-analysis-of-the-effects-of-data-augmentation-on-hmm-gmm-and-dnn-classifiers-1712.03579"/></url>
<url><loc>https://scifaro.com/en/abs/a-cascade-architecture-for-keyword-spotting-on-mobile-devices-1712.03603</loc><lastmod>2017-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cascade-architecture-for-keyword-spotting-on-mobile-devices-1712.03603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cascade-architecture-for-keyword-spotting-on-mobile-devices-1712.03603"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-localization-using-convolutional-neural-network-trained-with-noise-1712.04276</loc><lastmod>2017-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-localization-using-convolutional-neural-network-trained-with-noise-1712.04276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-localization-using-convolutional-neural-network-trained-with-noise-1712.04276"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-by-deep-learning-challenges-and-directions-1712.04371</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-by-deep-learning-challenges-and-directions-1712.04371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-by-deep-learning-challenges-and-directions-1712.04371"/></url>
<url><loc>https://scifaro.com/en/abs/audeep-unsupervised-learning-of-representations-from-audio-with-deep-recurrent-neural-networks-1712.04382</loc><lastmod>2017-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audeep-unsupervised-learning-of-representations-from-audio-with-deep-recurrent-neural-networks-1712.04382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audeep-unsupervised-learning-of-representations-from-audio-with-deep-recurrent-neural-networks-1712.04382"/></url>
<url><loc>https://scifaro.com/en/abs/dlr-toward-a-deep-learned-rhythmic-representation-for-music-content-analysis-1712.05119</loc><lastmod>2017-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dlr-toward-a-deep-learned-rhythmic-representation-for-music-content-analysis-1712.05119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dlr-toward-a-deep-learned-rhythmic-representation-for-music-content-analysis-1712.05119"/></url>
<url><loc>https://scifaro.com/en/abs/a-hierarchical-recurrent-neural-network-for-symbolic-melody-generation-1712.05274</loc><lastmod>2018-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hierarchical-recurrent-neural-network-for-symbolic-melody-generation-1712.05274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hierarchical-recurrent-neural-network-for-symbolic-melody-generation-1712.05274"/></url>
<url><loc>https://scifaro.com/en/abs/language-and-noise-transfer-in-speech-enhancement-generative-adversarial-network-1712.06340</loc><lastmod>2017-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-and-noise-transfer-in-speech-enhancement-generative-adversarial-network-1712.06340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-and-noise-transfer-in-speech-enhancement-generative-adversarial-network-1712.06340"/></url>
<url><loc>https://scifaro.com/en/abs/joint-model-based-recognition-and-localization-of-overlapped-acoustic-events-using-a-set-of-distributed-small-microphone-arrays-1712.07065</loc><lastmod>2017-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-model-based-recognition-and-localization-of-overlapped-acoustic-events-using-a-set-of-distributed-small-microphone-arrays-1712.07065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-model-based-recognition-and-localization-of-overlapped-acoustic-events-using-a-set-of-distributed-small-microphone-arrays-1712.07065"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-deep-improviser-a-prototype-deep-learning-post-tonal-free-music-generator-1712.07799</loc><lastmod>2017-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-deep-improviser-a-prototype-deep-learning-post-tonal-free-music-generator-1712.07799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-deep-improviser-a-prototype-deep-learning-post-tonal-free-music-generator-1712.07799"/></url>
<url><loc>https://scifaro.com/en/abs/indoor-sound-source-localization-with-probabilistic-neural-network-1712.07814</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/indoor-sound-source-localization-with-probabilistic-neural-network-1712.07814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/indoor-sound-source-localization-with-probabilistic-neural-network-1712.07814"/></url>
<url><loc>https://scifaro.com/en/abs/rate-distributed-spatial-filtering-based-noise-reduction-in-wireless-acoustic-sensor-networks-1712.07941</loc><lastmod>2017-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rate-distributed-spatial-filtering-based-noise-reduction-in-wireless-acoustic-sensor-networks-1712.07941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rate-distributed-spatial-filtering-based-noise-reduction-in-wireless-acoustic-sensor-networks-1712.07941"/></url>
<url><loc>https://scifaro.com/en/abs/on-using-backpropagation-for-speech-texture-generation-and-voice-conversion-1712.08363</loc><lastmod>2018-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-using-backpropagation-for-speech-texture-generation-and-voice-conversion-1712.08363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-using-backpropagation-for-speech-texture-generation-and-voice-conversion-1712.08363"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-with-paralleling-recurrent-convolutional-neural-network-1712.08370</loc><lastmod>2017-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-with-paralleling-recurrent-convolutional-neural-network-1712.08370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-with-paralleling-recurrent-convolutional-neural-network-1712.08370"/></url>
<url><loc>https://scifaro.com/en/abs/variational-autoencoders-for-learning-latent-representations-of-speech-emotion-a-preliminary-study-1712.08708</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-autoencoders-for-learning-latent-representations-of-speech-emotion-a-preliminary-study-1712.08708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-autoencoders-for-learning-latent-representations-of-speech-emotion-a-preliminary-study-1712.08708"/></url>
<url><loc>https://scifaro.com/en/abs/eventness-object-detection-on-spectrograms-for-temporal-localization-of-audio-events-1712.09668</loc><lastmod>2018-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eventness-object-detection-on-spectrograms-for-temporal-localization-of-audio-events-1712.09668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eventness-object-detection-on-spectrograms-for-temporal-localization-of-audio-events-1712.09668"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-instance-deep-learning-for-weakly-supervised-small-footprint-audio-event-detection-1712.09673</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-instance-deep-learning-for-weakly-supervised-small-footprint-audio-event-detection-1712.09673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-instance-deep-learning-for-weakly-supervised-small-footprint-audio-event-detection-1712.09673"/></url>
<url><loc>https://scifaro.com/en/abs/a-light-weight-multimodal-framework-for-improved-environmental-audio-tagging-1712.09680</loc><lastmod>2018-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-light-weight-multimodal-framework-for-improved-environmental-audio-tagging-1712.09680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-light-weight-multimodal-framework-for-improved-environmental-audio-tagging-1712.09680"/></url>
<url><loc>https://scifaro.com/en/abs/deepj-style-specific-music-generation-1801.00887</loc><lastmod>2018-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepj-style-specific-music-generation-1801.00887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepj-style-specific-music-generation-1801.00887"/></url>
<url><loc>https://scifaro.com/en/abs/neural-style-transfer-for-audio-spectograms-1801.01589</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-style-transfer-for-audio-spectograms-1801.01589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-style-transfer-for-audio-spectograms-1801.01589"/></url>
<url><loc>https://scifaro.com/en/abs/tree-based-classification-of-tabla-strokes-1801.01712</loc><lastmod>2018-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tree-based-classification-of-tabla-strokes-1801.01712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tree-based-classification-of-tabla-strokes-1801.01712"/></url>
<url><loc>https://scifaro.com/en/abs/binning-based-algorithm-for-pitch-detection-in-hindustani-classical-music-1801.02155</loc><lastmod>2018-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binning-based-algorithm-for-pitch-detection-in-hindustani-classical-music-1801.02155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binning-based-algorithm-for-pitch-detection-in-hindustani-classical-music-1801.02155"/></url>
<url><loc>https://scifaro.com/en/abs/attacking-speaker-recognition-with-deep-generative-models-1801.02384</loc><lastmod>2018-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attacking-speaker-recognition-with-deep-generative-models-1801.02384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attacking-speaker-recognition-with-deep-generative-models-1801.02384"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2017-task-1-acoustic-scene-classification-using-shift-invariant-kernels-and-random-features-1801.02690</loc><lastmod>2018-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2017-task-1-acoustic-scene-classification-using-shift-invariant-kernels-and-random-features-1801.02690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2017-task-1-acoustic-scene-classification-using-shift-invariant-kernels-and-random-features-1801.02690"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-based-on-integrated-deep-and-ensemble-learning-algorithm-1801.04052</loc><lastmod>2018-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-based-on-integrated-deep-and-ensemble-learning-algorithm-1801.04052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-based-on-integrated-deep-and-ensemble-learning-algorithm-1801.04052"/></url>
<url><loc>https://scifaro.com/en/abs/separation-of-instrument-sounds-using-non-negative-matrix-factorization-with-spectral-envelope-constraints-1801.04081</loc><lastmod>2018-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separation-of-instrument-sounds-using-non-negative-matrix-factorization-with-spectral-envelope-constraints-1801.04081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separation-of-instrument-sounds-using-non-negative-matrix-factorization-with-spectral-envelope-constraints-1801.04081"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-classification-of-music-genre-using-masked-conditional-neural-networks-1801.05504</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-classification-of-music-genre-using-masked-conditional-neural-networks-1801.05504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-classification-of-music-genre-using-masked-conditional-neural-networks-1801.05504"/></url>
<url><loc>https://scifaro.com/en/abs/nels-never-ending-learner-of-sounds-1801.05544</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nels-never-ending-learner-of-sounds-1801.05544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nels-never-ending-learner-of-sounds-1801.05544"/></url>
<url><loc>https://scifaro.com/en/abs/gender-dependent-emotion-recognition-based-on-hmms-and-sphmms-1801.06657</loc><lastmod>2018-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-dependent-emotion-recognition-based-on-hmms-and-sphmms-1801.06657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-dependent-emotion-recognition-based-on-hmms-and-sphmms-1801.06657"/></url>
<url><loc>https://scifaro.com/en/abs/identifying-speakers-using-their-emotion-cues-1801.07054</loc><lastmod>2018-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identifying-speakers-using-their-emotion-cues-1801.07054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identifying-speakers-using-their-emotion-cues-1801.07054"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-modeling-and-generation-using-hierarchical-recurrent-neural-networks-for-speech-bandwidth-extension-1801.07910</loc><lastmod>2018-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-modeling-and-generation-using-hierarchical-recurrent-neural-networks-for-speech-bandwidth-extension-1801.07910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-modeling-and-generation-using-hierarchical-recurrent-neural-networks-for-speech-bandwidth-extension-1801.07910"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-sound-event-detection-using-3d-convolutional-neural-networks-for-learning-inter-channel-features-1801.09522</loc><lastmod>2018-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-sound-event-detection-using-3d-convolutional-neural-networks-for-learning-inter-channel-features-1801.09522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-sound-event-detection-using-3d-convolutional-neural-networks-for-learning-inter-channel-features-1801.09522"/></url>
<url><loc>https://scifaro.com/en/abs/on-psychoacoustically-weighted-cost-functions-towards-resource-efficient-deep-neural-networks-for-speech-denoising-1801.09774</loc><lastmod>2018-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-psychoacoustically-weighted-cost-functions-towards-resource-efficient-deep-neural-networks-for-speech-denoising-1801.09774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-psychoacoustically-weighted-cost-functions-towards-resource-efficient-deep-neural-networks-for-speech-denoising-1801.09774"/></url>
<url><loc>https://scifaro.com/en/abs/deep-predictive-models-in-interactive-music-1801.10492</loc><lastmod>2018-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-predictive-models-in-interactive-music-1801.10492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-predictive-models-in-interactive-music-1801.10492"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-and-graphemic-systems-for-multi-genre-broadcast-transcription-1802.00254</loc><lastmod>2018-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-and-graphemic-systems-for-multi-genre-broadcast-transcription-1802.00254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-and-graphemic-systems-for-multi-genre-broadcast-transcription-1802.00254"/></url>
<url><loc>https://scifaro.com/en/abs/mad-twinnet-masker-denoiser-architecture-with-twin-networks-for-monaural-sound-source-separation-1802.00300</loc><lastmod>2018-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mad-twinnet-masker-denoiser-architecture-with-twin-networks-for-monaural-sound-source-separation-1802.00300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mad-twinnet-masker-denoiser-architecture-with-twin-networks-for-monaural-sound-source-separation-1802.00300"/></url>
<url><loc>https://scifaro.com/en/abs/approximate-message-passing-for-underdetermined-audio-source-separation-1802.00380</loc><lastmod>2018-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approximate-message-passing-for-underdetermined-audio-source-separation-1802.00380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approximate-message-passing-for-underdetermined-audio-source-separation-1802.00380"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-speech-enhancement-using-deep-neural-networks-by-maximizing-a-short-time-objective-intelligibility-measure-1802.00604</loc><lastmod>2018-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-speech-enhancement-using-deep-neural-networks-by-maximizing-a-short-time-objective-intelligibility-measure-1802.00604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-speech-enhancement-using-deep-neural-networks-by-maximizing-a-short-time-objective-intelligibility-measure-1802.00604"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-approaches-for-mitigating-intergroup-variability-in-personality-recognition-1802.01405</loc><lastmod>2018-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-approaches-for-mitigating-intergroup-variability-in-personality-recognition-1802.01405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-approaches-for-mitigating-intergroup-variability-in-personality-recognition-1802.01405"/></url>
<url><loc>https://scifaro.com/en/abs/complex-isnmf-a-phase-aware-model-for-monaural-audio-source-separation-1802.03156</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-isnmf-a-phase-aware-model-for-monaural-audio-source-separation-1802.03156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-isnmf-a-phase-aware-model-for-monaural-audio-source-separation-1802.03156"/></url>
<url><loc>https://scifaro.com/en/abs/2-gram-based-phonetic-feature-generation-for-convolutional-neural-network-in-assessment-of-trademark-similarity-1802.03581</loc><lastmod>2018-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/2-gram-based-phonetic-feature-generation-for-convolutional-neural-network-in-assessment-of-trademark-similarity-1802.03581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/2-gram-based-phonetic-feature-generation-for-convolutional-neural-network-in-assessment-of-trademark-similarity-1802.03581"/></url>
<url><loc>https://scifaro.com/en/abs/linear-regression-for-speaker-verification-1802.04113</loc><lastmod>2018-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linear-regression-for-speaker-verification-1802.04113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linear-regression-for-speaker-verification-1802.04113"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-audio-synthesis-1802.04208</loc><lastmod>2019-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-audio-synthesis-1802.04208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-audio-synthesis-1802.04208"/></url>
<url><loc>https://scifaro.com/en/abs/bachprop-learning-to-compose-music-in-multiple-styles-1802.05162</loc><lastmod>2018-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bachprop-learning-to-compose-music-in-multiple-styles-1802.05162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bachprop-learning-to-compose-music-in-multiple-styles-1802.05162"/></url>
<url><loc>https://scifaro.com/en/abs/blind-source-separation-with-optimal-transport-non-negative-matrix-factorization-1802.05429</loc><lastmod>2018-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-source-separation-with-optimal-transport-non-negative-matrix-factorization-1802.05429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-source-separation-with-optimal-transport-non-negative-matrix-factorization-1802.05429"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-lstm-architecture-for-speech-emotion-recognition-with-data-augmentation-1802.05630</loc><lastmod>2018-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-lstm-architecture-for-speech-emotion-recognition-with-data-augmentation-1802.05630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-lstm-architecture-for-speech-emotion-recognition-with-data-augmentation-1802.05630"/></url>
<url><loc>https://scifaro.com/en/abs/voice-impersonation-using-generative-adversarial-networks-1802.06840</loc><lastmod>2018-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-impersonation-using-generative-adversarial-networks-1802.06840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-impersonation-using-generative-adversarial-networks-1802.06840"/></url>
<url><loc>https://scifaro.com/en/abs/neural-predictive-coding-using-convolutional-neural-networks-towards-unsupervised-learning-of-speaker-characteristics-1802.07860</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-predictive-coding-using-convolutional-neural-networks-towards-unsupervised-learning-of-speaker-characteristics-1802.07860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-predictive-coding-using-convolutional-neural-networks-towards-unsupervised-learning-of-speaker-characteristics-1802.07860"/></url>
<url><loc>https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-with-conditional-adversarial-autoencoders-1802.08008</loc><lastmod>2018-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-with-conditional-adversarial-autoencoders-1802.08008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-with-conditional-adversarial-autoencoders-1802.08008"/></url>
<url><loc>https://scifaro.com/en/abs/do-wavenets-dream-of-acoustic-waves-1802.08370</loc><lastmod>2018-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-wavenets-dream-of-acoustic-waves-1802.08370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-wavenets-dream-of-acoustic-waves-1802.08370"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-neural-audio-synthesis-1802.08435</loc><lastmod>2018-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-neural-audio-synthesis-1802.08435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-neural-audio-synthesis-1802.08435"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-network-achieves-human-level-accuracy-in-music-genre-classification-1802.09697</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-network-achieves-human-level-accuracy-in-music-genre-classification-1802.09697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-network-achieves-human-level-accuracy-in-music-genre-classification-1802.09697"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-transducer-positioning-in-active-noise-control-1802.10058</loc><lastmod>2018-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-transducer-positioning-in-active-noise-control-1802.10058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-transducer-positioning-in-active-noise-control-1802.10058"/></url>
<url><loc>https://scifaro.com/en/abs/interplay-between-musical-practices-and-tuning-in-the-marimba-de-chonta-music-1802.10162</loc><lastmod>2018-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interplay-between-musical-practices-and-tuning-in-the-marimba-de-chonta-music-1802.10162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interplay-between-musical-practices-and-tuning-in-the-marimba-de-chonta-music-1802.10162"/></url>
<url><loc>https://scifaro.com/en/abs/mode-domain-spatial-active-noise-control-using-sparse-signal-representation-1803.00187</loc><lastmod>2018-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mode-domain-spatial-active-noise-control-using-sparse-signal-representation-1803.00187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mode-domain-spatial-active-noise-control-using-sparse-signal-representation-1803.00187"/></url>
<url><loc>https://scifaro.com/en/abs/raw-multi-channel-audio-source-separation-using-multi-resolution-convolutional-auto-encoders-1803.00702</loc><lastmod>2018-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raw-multi-channel-audio-source-separation-using-multi-resolution-convolutional-auto-encoders-1803.00702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raw-multi-channel-audio-source-separation-using-multi-resolution-convolutional-auto-encoders-1803.00702"/></url>
<url><loc>https://scifaro.com/en/abs/speechpy-a-library-for-speech-processing-and-recognition-1803.01094</loc><lastmod>2018-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechpy-a-library-for-speech-processing-and-recognition-1803.01094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechpy-a-library-for-speech-processing-and-recognition-1803.01094"/></url>
<url><loc>https://scifaro.com/en/abs/audio-only-bird-species-automated-identification-method-with-limited-training-data-based-on-multi-channel-deep-convolutional-neural-networks-1803.01107</loc><lastmod>2018-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-only-bird-species-automated-identification-method-with-limited-training-data-based-on-multi-channel-deep-convolutional-neural-networks-1803.01107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-only-bird-species-automated-identification-method-with-limited-training-data-based-on-multi-channel-deep-convolutional-neural-networks-1803.01107"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-sound-source-localisation-with-steered-response-power-density-and-hierarchical-grid-refinement-1803.01339</loc><lastmod>2018-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-sound-source-localisation-with-steered-response-power-density-and-hierarchical-grid-refinement-1803.01339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-sound-source-localisation-with-steered-response-power-density-and-hierarchical-grid-refinement-1803.01339"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-networks-and-language-embeddings-for-end-to-end-dialect-recognition-1803.04567</loc><lastmod>2018-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-networks-and-language-embeddings-for-end-to-end-dialect-recognition-1803.04567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-networks-and-language-embeddings-for-end-to-end-dialect-recognition-1803.04567"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-using-spectral-analysis-and-sparse-representation-of-the-signals-1803.04652</loc><lastmod>2018-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-using-spectral-analysis-and-sparse-representation-of-the-signals-1803.04652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-using-spectral-analysis-and-sparse-representation-of-the-signals-1803.04652"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-effect-of-music-and-lyrics-on-spoken-word-recognition-1803.05058</loc><lastmod>2018-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-effect-of-music-and-lyrics-on-spoken-word-recognition-1803.05058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-effect-of-music-and-lyrics-on-spoken-word-recognition-1803.05058"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-recognize-musical-genre-from-audio-1803.05337</loc><lastmod>2018-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-recognize-musical-genre-from-audio-1803.05337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-recognize-musical-genre-from-audio-1803.05337"/></url>
<url><loc>https://scifaro.com/en/abs/music-style-transfer-a-position-paper-1803.06841</loc><lastmod>2018-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-style-transfer-a-position-paper-1803.06841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-style-transfer-a-position-paper-1803.06841"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-clustering-with-neural-networks-and-audio-processing-1803.08276</loc><lastmod>2018-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-clustering-with-neural-networks-and-audio-processing-1803.08276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-clustering-with-neural-networks-and-audio-processing-1803.08276"/></url>
<url><loc>https://scifaro.com/en/abs/generalization-challenges-for-neural-architectures-in-audio-source-separation-1803.08629</loc><lastmod>2018-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalization-challenges-for-neural-architectures-in-audio-source-separation-1803.08629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalization-challenges-for-neural-architectures-in-audio-source-separation-1803.08629"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-music-accompanist-1803.09033</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-music-accompanist-1803.09033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-music-accompanist-1803.09033"/></url>
<url><loc>https://scifaro.com/en/abs/mtgan-speaker-verification-through-multitasking-triplet-generative-adversarial-networks-1803.09059</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mtgan-speaker-verification-through-multitasking-triplet-generative-adversarial-networks-1803.09059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mtgan-speaker-verification-through-multitasking-triplet-generative-adversarial-networks-1803.09059"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-feature-mapping-with-mimic-loss-for-robust-speech-recognition-1803.09816</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-feature-mapping-with-mimic-loss-for-robust-speech-recognition-1803.09816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-feature-mapping-with-mimic-loss-for-robust-speech-recognition-1803.09816"/></url>
<url><loc>https://scifaro.com/en/abs/building-state-of-the-art-distant-speech-recognition-using-the-chime-4-challenge-with-a-setup-of-speech-enhancement-baseline-1803.10109</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-state-of-the-art-distant-speech-recognition-using-the-chime-4-challenge-with-a-setup-of-speech-enhancement-baseline-1803.10109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-state-of-the-art-distant-speech-recognition-using-the-chime-4-challenge-with-a-setup-of-speech-enhancement-baseline-1803.10109"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-generative-adversarial-networks-based-speech-dereverberation-for-robust-speech-recognition-1803.10132</loc><lastmod>2019-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-generative-adversarial-networks-based-speech-dereverberation-for-robust-speech-recognition-1803.10132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-generative-adversarial-networks-based-speech-dereverberation-for-robust-speech-recognition-1803.10132"/></url>
<url><loc>https://scifaro.com/en/abs/empirical-evaluation-of-speaker-adaptation-on-dnn-based-acoustic-model-1803.10146</loc><lastmod>2019-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empirical-evaluation-of-speaker-adaptation-on-dnn-based-acoustic-model-1803.10146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empirical-evaluation-of-speaker-adaptation-on-dnn-based-acoustic-model-1803.10146"/></url>
<url><loc>https://scifaro.com/en/abs/learning-environmental-sounds-with-multi-scale-convolutional-neural-network-1803.10219</loc><lastmod>2018-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-environmental-sounds-with-multi-scale-convolutional-neural-network-1803.10219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-environmental-sounds-with-multi-scale-convolutional-neural-network-1803.10219"/></url>
<url><loc>https://scifaro.com/en/abs/the-fifth-chime-speech-separation-and-recognition-challenge-dataset-task-and-baselines-1803.10609</loc><lastmod>2018-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-fifth-chime-speech-separation-and-recognition-challenge-dataset-task-and-baselines-1803.10609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-fifth-chime-speech-separation-and-recognition-challenge-dataset-task-and-baselines-1803.10609"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-end-to-end-models-for-small-footprint-keyword-spotting-1803.10916</loc><lastmod>2018-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-end-to-end-models-for-small-footprint-keyword-spotting-1803.10916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-end-to-end-models-for-small-footprint-keyword-spotting-1803.10916"/></url>
<url><loc>https://scifaro.com/en/abs/cracking-the-cocktail-party-problem-by-multi-beam-deep-attractor-network-1803.10924</loc><lastmod>2018-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cracking-the-cocktail-party-problem-by-multi-beam-deep-attractor-network-1803.10924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cracking-the-cocktail-party-problem-by-multi-beam-deep-attractor-network-1803.10924"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-end-to-end-audio-transforms-1804.00047</loc><lastmod>2018-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-end-to-end-audio-transforms-1804.00047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-end-to-end-audio-transforms-1804.00047"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-three-stage-framework-1804.00155</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-three-stage-framework-1804.00155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-three-stage-framework-1804.00155"/></url>
<url><loc>https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-each-of-neutral-and-shouted-talking-environments-1804.00981</loc><lastmod>2018-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-each-of-neutral-and-shouted-talking-environments-1804.00981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-each-of-neutral-and-shouted-talking-environments-1804.00981"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-the-max-and-noisy-or-pooling-functions-in-multiple-instance-learning-for-weakly-supervised-sequence-learning-tasks-1804.01146</loc><lastmod>2018-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-the-max-and-noisy-or-pooling-functions-in-multiple-instance-learning-for-weakly-supervised-sequence-learning-tasks-1804.01146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-the-max-and-noisy-or-pooling-functions-in-multiple-instance-learning-for-weakly-supervised-sequence-learning-tasks-1804.01146"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-using-machine-learning-techniques-1804.01149</loc><lastmod>2018-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-using-machine-learning-techniques-1804.01149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-using-machine-learning-techniques-1804.01149"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-vehicles-based-on-audio-signals-using-quadratic-discriminant-analysis-and-high-energy-feature-vectors-1804.01212</loc><lastmod>2018-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-vehicles-based-on-audio-signals-using-quadratic-discriminant-analysis-and-high-energy-feature-vectors-1804.01212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-vehicles-based-on-audio-signals-using-quadratic-discriminant-analysis-and-high-energy-feature-vectors-1804.01212"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-detecting-and-separating-singing-voice-a-multi-task-approach-1804.01650</loc><lastmod>2018-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-detecting-and-separating-singing-voice-a-multi-task-approach-1804.01650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-detecting-and-separating-singing-voice-a-multi-task-approach-1804.01650"/></url>
<url><loc>https://scifaro.com/en/abs/does-k-matter-k-nn-hubness-analysis-for-kernel-additive-modelling-vocal-separation-1804.02325</loc><lastmod>2018-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-k-matter-k-nn-hubness-analysis-for-kernel-additive-modelling-vocal-separation-1804.02325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-k-matter-k-nn-hubness-analysis-for-kernel-additive-modelling-vocal-separation-1804.02325"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-pitch-tracking-with-deep-layered-learning-1804.02918</loc><lastmod>2019-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-pitch-tracking-with-deep-layered-learning-1804.02918"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-pitch-tracking-with-deep-layered-learning-1804.02918"/></url>
<url><loc>https://scifaro.com/en/abs/realtime-active-sound-source-localization-for-unmanned-ground-robots-using-a-self-rotational-bi-microphone-array-1804.03372</loc><lastmod>2018-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realtime-active-sound-source-localization-for-unmanned-ground-robots-using-a-self-rotational-bi-microphone-array-1804.03372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realtime-active-sound-source-localization-for-unmanned-ground-robots-using-a-self-rotational-bi-microphone-array-1804.03372"/></url>
<url><loc>https://scifaro.com/en/abs/looking-to-listen-at-the-cocktail-party-a-speaker-independent-audio-visual-model-for-speech-separation-1804.03619</loc><lastmod>2018-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/looking-to-listen-at-the-cocktail-party-a-speaker-independent-audio-visual-model-for-speech-separation-1804.03619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/looking-to-listen-at-the-cocktail-party-a-speaker-independent-audio-visual-model-for-speech-separation-1804.03619"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-and-time-frequency-segmentation-from-weakly-labelled-data-1804.04715</loc><lastmod>2019-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-and-time-frequency-segmentation-from-weakly-labelled-data-1804.04715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-and-time-frequency-segmentation-from-weakly-labelled-data-1804.04715"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embedding-extraction-with-phonetic-information-1804.04862</loc><lastmod>2018-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embedding-extraction-with-phonetic-information-1804.04862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embedding-extraction-with-phonetic-information-1804.04862"/></url>
<url><loc>https://scifaro.com/en/abs/voices-obscured-in-complex-environmental-settings-voices-corpus-1804.05053</loc><lastmod>2018-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voices-obscured-in-complex-environmental-settings-voices-corpus-1804.05053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voices-obscured-in-complex-environmental-settings-voices-corpus-1804.05053"/></url>
<url><loc>https://scifaro.com/en/abs/multi-sound-source-localization-using-machine-learning-for-small-autonomous-unmanned-vehicles-with-a-self-rotating-bi-microphone-array-1804.05111</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-sound-source-localization-using-machine-learning-for-small-autonomous-unmanned-vehicles-with-a-self-rotating-bi-microphone-array-1804.05111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-sound-source-localization-using-machine-learning-for-small-autonomous-unmanned-vehicles-with-a-self-rotating-bi-microphone-array-1804.05111"/></url>
<url><loc>https://scifaro.com/en/abs/transcribing-lyrics-from-commercial-song-audio-the-first-step-towards-singing-content-processing-1804.05306</loc><lastmod>2018-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcribing-lyrics-from-commercial-song-audio-the-first-step-towards-singing-content-processing-1804.05306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcribing-lyrics-from-commercial-song-audio-the-first-step-towards-singing-content-processing-1804.05306"/></url>
<url><loc>https://scifaro.com/en/abs/computing-information-quantity-as-similarity-measure-for-music-classification-task-1804.05486</loc><lastmod>2018-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computing-information-quantity-as-similarity-measure-for-music-classification-task-1804.05486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computing-information-quantity-as-similarity-measure-for-music-classification-task-1804.05486"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-rain-and-cicada-chorus-filtering-of-bird-acoustic-data-1804.05502</loc><lastmod>2018-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-rain-and-cicada-chorus-filtering-of-bird-acoustic-data-1804.05502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-rain-and-cicada-chorus-filtering-of-bird-acoustic-data-1804.05502"/></url>
<url><loc>https://scifaro.com/en/abs/unspeech-unsupervised-speech-context-embeddings-1804.06775</loc><lastmod>2018-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unspeech-unsupervised-speech-context-embeddings-1804.06775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unspeech-unsupervised-speech-context-embeddings-1804.06775"/></url>
<url><loc>https://scifaro.com/en/abs/shaking-acoustic-spectral-sub-bands-can-better-regularize-learning-in-affective-computing-1804.06779</loc><lastmod>2018-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shaking-acoustic-spectral-sub-bands-can-better-regularize-learning-in-affective-computing-1804.06779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shaking-acoustic-spectral-sub-bands-can-better-regularize-learning-in-affective-computing-1804.06779"/></url>
<url><loc>https://scifaro.com/en/abs/deep-layered-learning-in-mir-1804.07297</loc><lastmod>2018-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-layered-learning-in-mir-1804.07297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-layered-learning-in-mir-1804.07297"/></url>
<url><loc>https://scifaro.com/en/abs/generating-music-using-an-lstm-network-1804.07300</loc><lastmod>2018-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-music-using-an-lstm-network-1804.07300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-music-using-an-lstm-network-1804.07300"/></url>
<url><loc>https://scifaro.com/en/abs/tempo-invariant-processing-of-rhythm-with-convolutional-neural-networks-1804.08167</loc><lastmod>2018-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tempo-invariant-processing-of-rhythm-with-convolutional-neural-networks-1804.08167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tempo-invariant-processing-of-rhythm-with-convolutional-neural-networks-1804.08167"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-lead-and-accompaniment-separation-in-music-1804.08300</loc><lastmod>2018-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-lead-and-accompaniment-separation-in-music-1804.08300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-lead-and-accompaniment-separation-in-music-1804.08300"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-evaluation-of-the-effectiveness-of-voice-disguise-by-age-modification-1804.08910</loc><lastmod>2018-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-evaluation-of-the-effectiveness-of-voice-disguise-by-age-modification-1804.08910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-evaluation-of-the-effectiveness-of-voice-disguise-by-age-modification-1804.08910"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-melody-extraction-using-patch-based-cnn-1804.09202</loc><lastmod>2018-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-melody-extraction-using-patch-based-cnn-1804.09202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-melody-extraction-using-patch-based-cnn-1804.09202"/></url>
<url><loc>https://scifaro.com/en/abs/a-closer-look-at-weak-label-learning-for-audio-events-1804.09288</loc><lastmod>2018-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-closer-look-at-weak-label-learning-for-audio-events-1804.09288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-closer-look-at-weak-label-learning-for-audio-events-1804.09288"/></url>
<url><loc>https://scifaro.com/en/abs/off-the-beaten-track-using-deep-learning-to-interpolate-between-music-genres-1804.09808</loc><lastmod>2018-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/off-the-beaten-track-using-deep-learning-to-interpolate-between-music-genres-1804.09808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/off-the-beaten-track-using-deep-learning-to-interpolate-between-music-genres-1804.09808"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-pooling-operators-for-weakly-labeled-sound-event-detection-1804.10070</loc><lastmod>2018-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-pooling-operators-for-weakly-labeled-sound-event-detection-1804.10070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-pooling-operators-for-weakly-labeled-sound-event-detection-1804.10070"/></url>
<url><loc>https://scifaro.com/en/abs/on-deep-speaker-embeddings-for-text-independent-speaker-recognition-1804.10080</loc><lastmod>2018-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-deep-speaker-embeddings-for-text-independent-speaker-recognition-1804.10080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-deep-speaker-embeddings-for-text-independent-speaker-recognition-1804.10080"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-raw-speech-using-convolutional-neural-networks-1804.10147</loc><lastmod>2019-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-raw-speech-using-convolutional-neural-networks-1804.10147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-raw-speech-using-convolutional-neural-networks-1804.10147"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-separation-with-unfolded-iterative-phase-reconstruction-1804.10204</loc><lastmod>2018-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-separation-with-unfolded-iterative-phase-reconstruction-1804.10204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-separation-with-unfolded-iterative-phase-reconstruction-1804.10204"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-denoising-with-vector-space-projections-1804.10669</loc><lastmod>2018-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-denoising-with-vector-space-projections-1804.10669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-denoising-with-vector-space-projections-1804.10669"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-documentation-of-icd-codes-with-far-field-speech-recognition-1804.11046</loc><lastmod>2018-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-documentation-of-icd-codes-with-far-field-speech-recognition-1804.11046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-documentation-of-icd-codes-with-far-field-speech-recognition-1804.11046"/></url>
<url><loc>https://scifaro.com/en/abs/waaw-csound-1804.11120</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waaw-csound-1804.11120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waaw-csound-1804.11120"/></url>
<url><loc>https://scifaro.com/en/abs/a-toolbox-for-rendering-virtual-acoustic-environments-in-the-context-of-audiology-1804.11300</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-toolbox-for-rendering-virtual-acoustic-environments-in-the-context-of-audiology-1804.11300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-toolbox-for-rendering-virtual-acoustic-environments-in-the-context-of-audiology-1804.11300"/></url>
<url><loc>https://scifaro.com/en/abs/randomly-weighted-cnns-for-music-audio-classification-1805.00237</loc><lastmod>2019-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/randomly-weighted-cnns-for-music-audio-classification-1805.00237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/randomly-weighted-cnns-for-music-audio-classification-1805.00237"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-speech-enhancement-1805.00579</loc><lastmod>2018-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-speech-enhancement-1805.00579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-networks-for-speech-enhancement-1805.00579"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-residual-cnn-with-l-gm-loss-speaker-verification-system-1805.00645</loc><lastmod>2018-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-residual-cnn-with-l-gm-loss-speaker-verification-system-1805.00645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-residual-cnn-with-l-gm-loss-speaker-verification-system-1805.00645"/></url>
<url><loc>https://scifaro.com/en/abs/sonyc-a-system-for-the-monitoring-analysis-and-mitigation-of-urban-noise-pollution-1805.00889</loc><lastmod>2018-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonyc-a-system-for-the-monitoring-analysis-and-mitigation-of-urban-noise-pollution-1805.00889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonyc-a-system-for-the-monitoring-analysis-and-mitigation-of-urban-noise-pollution-1805.00889"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-blind-source-separation-for-singing-voice-detection-a-comparative-study-1805.01201</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-blind-source-separation-for-singing-voice-detection-a-comparative-study-1805.01201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-blind-source-separation-for-singing-voice-detection-a-comparative-study-1805.01201"/></url>
<url><loc>https://scifaro.com/en/abs/noise-invariant-frame-selection-a-simple-method-to-address-the-background-noise-problem-for-text-independent-speaker-verification-1805.01259</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-invariant-frame-selection-a-simple-method-to-address-the-background-noise-problem-for-text-independent-speaker-verification-1805.01259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-invariant-frame-selection-a-simple-method-to-address-the-background-noise-problem-for-text-independent-speaker-verification-1805.01259"/></url>
<url><loc>https://scifaro.com/en/abs/generation-of-infra-sound-to-replicate-a-wind-turbine-1805.01297</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generation-of-infra-sound-to-replicate-a-wind-turbine-1805.01297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generation-of-infra-sound-to-replicate-a-wind-turbine-1805.01297"/></url>
<url><loc>https://scifaro.com/en/abs/deep-discriminant-analysis-for-i-vector-based-robust-speaker-recognition-1805.01344</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-discriminant-analysis-for-i-vector-based-robust-speaker-recognition-1805.01344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-discriminant-analysis-for-i-vector-based-robust-speaker-recognition-1805.01344"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-noise-robustness-of-acoustic-model-via-deep-adversarial-training-1805.01357</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-noise-robustness-of-acoustic-model-via-deep-adversarial-training-1805.01357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-noise-robustness-of-acoustic-model-via-deep-adversarial-training-1805.01357"/></url>
<url><loc>https://scifaro.com/en/abs/omg-emotion-challenge-excouple-team-1805.01576</loc><lastmod>2018-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omg-emotion-challenge-excouple-team-1805.01576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omg-emotion-challenge-excouple-team-1805.01576"/></url>
<url><loc>https://scifaro.com/en/abs/a-convex-approximation-of-the-relaxed-binaural-beamforming-optimization-problem-1805.01692</loc><lastmod>2019-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convex-approximation-of-the-relaxed-binaural-beamforming-optimization-problem-1805.01692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convex-approximation-of-the-relaxed-binaural-beamforming-optimization-problem-1805.01692"/></url>
<url><loc>https://scifaro.com/en/abs/mmdenselstm-an-efficient-combination-of-convolutional-and-recurrent-neural-networks-for-audio-source-separation-1805.02410</loc><lastmod>2018-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmdenselstm-an-efficient-combination-of-convolutional-and-recurrent-neural-networks-for-audio-source-separation-1805.02410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmdenselstm-an-efficient-combination-of-convolutional-and-recurrent-neural-networks-for-audio-source-separation-1805.02410"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-approach-to-smooth-pitch-correction-for-singing-voice-in-pop-music-1805.02603</loc><lastmod>2018-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-approach-to-smooth-pitch-correction-for-singing-voice-in-pop-music-1805.02603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-approach-to-smooth-pitch-correction-for-singing-voice-in-pop-music-1805.02603"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-polyphonic-sound-event-detection-using-convolutional-recurrent-neural-networks-with-learned-time-frequency-representation-input-1805.03647</loc><lastmod>2018-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-polyphonic-sound-event-detection-using-convolutional-recurrent-neural-networks-with-learned-time-frequency-representation-input-1805.03647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-polyphonic-sound-event-detection-using-convolutional-recurrent-neural-networks-with-learned-time-frequency-representation-input-1805.03647"/></url>
<url><loc>https://scifaro.com/en/abs/extended-pipeline-for-content-based-feature-engineering-in-music-genre-recognition-1805.05324</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extended-pipeline-for-content-based-feature-engineering-in-music-genre-recognition-1805.05324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extended-pipeline-for-content-based-feature-engineering-in-music-genre-recognition-1805.05324"/></url>
<url><loc>https://scifaro.com/en/abs/a-purely-end-to-end-system-for-multi-speaker-speech-recognition-1805.05826</loc><lastmod>2018-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-purely-end-to-end-system-for-multi-speaker-speech-recognition-1805.05826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-purely-end-to-end-system-for-multi-speaker-speech-recognition-1805.05826"/></url>
<url><loc>https://scifaro.com/en/abs/psd-estimation-and-source-separation-in-a-noisy-reverberant-environment-using-a-spherical-microphone-array-1805.06234</loc><lastmod>2018-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psd-estimation-and-source-separation-in-a-noisy-reverberant-environment-using-a-spherical-microphone-array-1805.06234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psd-estimation-and-source-separation-in-a-noisy-reverberant-environment-using-a-spherical-microphone-array-1805.06234"/></url>
<url><loc>https://scifaro.com/en/abs/fastfca-a-joint-diagonalization-based-fast-algorithm-for-audio-source-separation-using-a-full-rank-spatial-covariance-model-1805.06572</loc><lastmod>2018-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastfca-a-joint-diagonalization-based-fast-algorithm-for-audio-source-separation-using-a-full-rank-spatial-covariance-model-1805.06572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastfca-a-joint-diagonalization-based-fast-algorithm-for-audio-source-separation-using-a-full-rank-spatial-covariance-model-1805.06572"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-architectures-for-text-independent-speaker-verification-using-deep-neural-networks-1805.07628</loc><lastmod>2018-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-architectures-for-text-independent-speaker-verification-using-deep-neural-networks-1805.07628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-architectures-for-text-independent-speaker-verification-using-deep-neural-networks-1805.07628"/></url>
<url><loc>https://scifaro.com/en/abs/a-universal-music-translation-network-1805.07848</loc><lastmod>2018-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-universal-music-translation-network-1805.07848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-universal-music-translation-network-1805.07848"/></url>
<url><loc>https://scifaro.com/en/abs/generative-timbre-spaces-regularizing-variational-auto-encoders-with-perceptual-metrics-1805.08501</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-timbre-spaces-regularizing-variational-auto-encoders-with-perceptual-metrics-1805.08501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-timbre-spaces-regularizing-variational-auto-encoders-with-perceptual-metrics-1805.08501"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-using-stacked-hourglass-networks-1805.08559</loc><lastmod>2018-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-using-stacked-hourglass-networks-1805.08559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-using-stacked-hourglass-networks-1805.08559"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-clustering-using-dominant-sets-1805.08641</loc><lastmod>2018-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-clustering-using-dominant-sets-1805.08641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-clustering-using-dominant-sets-1805.08641"/></url>
<url><loc>https://scifaro.com/en/abs/fastfca-as-joint-diagonalization-based-acceleration-of-full-rank-spatial-covariance-analysis-for-separating-any-number-of-sources-1805.09498</loc><lastmod>2018-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastfca-as-joint-diagonalization-based-acceleration-of-full-rank-spatial-covariance-analysis-for-separating-any-number-of-sources-1805.09498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastfca-as-joint-diagonalization-based-acceleration-of-full-rank-spatial-covariance-analysis-for-separating-any-number-of-sources-1805.09498"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-classification-based-on-multi-temporal-resolution-convolutional-neural-network-combining-with-multi-level-features-1805.09752</loc><lastmod>2018-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-classification-based-on-multi-temporal-resolution-convolutional-neural-network-combining-with-multi-level-features-1805.09752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-classification-based-on-multi-temporal-resolution-convolutional-neural-network-combining-with-multi-level-features-1805.09752"/></url>
<url><loc>https://scifaro.com/en/abs/real-valued-parametric-conditioning-of-an-rnn-for-interactive-sound-synthesis-1805.10808</loc><lastmod>2018-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-valued-parametric-conditioning-of-an-rnn-for-interactive-sound-synthesis-1805.10808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-valued-parametric-conditioning-of-an-rnn-for-interactive-sound-synthesis-1805.10808"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-label-noise-sensitivity-of-convolutional-neural-networks-for-fine-grained-audio-signal-labelling-1805.10880</loc><lastmod>2018-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-label-noise-sensitivity-of-convolutional-neural-networks-for-fine-grained-audio-signal-labelling-1805.10880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-label-noise-sensitivity-of-convolutional-neural-networks-for-fine-grained-audio-signal-labelling-1805.10880"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-transcribe-by-ear-1805.11526</loc><lastmod>2018-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-transcribe-by-ear-1805.11526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-transcribe-by-ear-1805.11526"/></url>
<url><loc>https://scifaro.com/en/abs/receiver-placement-for-speech-enhancement-using-sound-propagation-optimization-1805.11533</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/receiver-placement-for-speech-enhancement-using-sound-propagation-optimization-1805.11533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/receiver-placement-for-speech-enhancement-using-sound-propagation-optimization-1805.11533"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-analysis-using-partially-connected-microphones-based-on-graph-cepstrum-1805.11782</loc><lastmod>2018-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-analysis-using-partially-connected-microphones-based-on-graph-cepstrum-1805.11782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-analysis-using-partially-connected-microphones-based-on-graph-cepstrum-1805.11782"/></url>
<url><loc>https://scifaro.com/en/abs/voice-imitating-text-to-speech-neural-networks-1806.00927</loc><lastmod>2018-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-imitating-text-to-speech-neural-networks-1806.00927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-imitating-text-to-speech-neural-networks-1806.00927"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-hmm-based-speaker-adaptive-emotion-recognition-using-proposed-epoch-and-mfcc-features-1806.00984</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-hmm-based-speaker-adaptive-emotion-recognition-using-proposed-epoch-and-mfcc-features-1806.00984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-hmm-based-speaker-adaptive-emotion-recognition-using-proposed-epoch-and-mfcc-features-1806.00984"/></url>
<url><loc>https://scifaro.com/en/abs/machines-hear-better-when-they-have-ears-1806.01145</loc><lastmod>2018-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machines-hear-better-when-they-have-ears-1806.01145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machines-hear-better-when-they-have-ears-1806.01145"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-singing-voice-detection-a-quantitative-review-and-the-future-outlook-1806.01180</loc><lastmod>2018-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-singing-voice-detection-a-quantitative-review-and-the-future-outlook-1806.01180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-singing-voice-detection-a-quantitative-review-and-the-future-outlook-1806.01180"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-fully-convolutional-network-for-speech-emotion-recognition-1806.01506</loc><lastmod>2019-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-fully-convolutional-network-for-speech-emotion-recognition-1806.01506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-fully-convolutional-network-for-speech-emotion-recognition-1806.01506"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-phoneme-segmentation-by-hierarchically-inferring-syllable-and-phoneme-onset-positions-1806.01665</loc><lastmod>2018-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-phoneme-segmentation-by-hierarchically-inferring-syllable-and-phoneme-onset-positions-1806.01665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-phoneme-segmentation-by-hierarchically-inferring-syllable-and-phoneme-onset-positions-1806.01665"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-vc-non-parallel-many-to-many-voice-conversion-with-star-generative-adversarial-networks-1806.02169</loc><lastmod>2018-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-vc-non-parallel-many-to-many-voice-conversion-with-star-generative-adversarial-networks-1806.02169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-vc-non-parallel-many-to-many-voice-conversion-with-star-generative-adversarial-networks-1806.02169"/></url>
<url><loc>https://scifaro.com/en/abs/wave-u-net-a-multi-scale-neural-network-for-end-to-end-audio-source-separation-1806.03185</loc><lastmod>2018-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wave-u-net-a-multi-scale-neural-network-for-end-to-end-audio-source-separation-1806.03185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wave-u-net-a-multi-scale-neural-network-for-end-to-end-audio-source-separation-1806.03185"/></url>
<url><loc>https://scifaro.com/en/abs/the-nes-music-database-a-multi-instrumental-dataset-with-expressive-performance-attributes-1806.04278</loc><lastmod>2018-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nes-music-database-a-multi-instrumental-dataset-with-expressive-performance-attributes-1806.04278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nes-music-database-a-multi-instrumental-dataset-with-expressive-performance-attributes-1806.04278"/></url>
<url><loc>https://scifaro.com/en/abs/capsule-routing-for-sound-event-detection-1806.04699</loc><lastmod>2018-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capsule-routing-for-sound-event-detection-1806.04699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capsule-routing-for-sound-event-detection-1806.04699"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-approach-to-mid-level-perceptual-musical-feature-modeling-1806.04903</loc><lastmod>2018-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-approach-to-mid-level-perceptual-musical-feature-modeling-1806.04903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-approach-to-mid-level-perceptual-musical-feature-modeling-1806.04903"/></url>
<url><loc>https://scifaro.com/en/abs/voxceleb2-deep-speaker-recognition-1806.05622</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxceleb2-deep-speaker-recognition-1806.05622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxceleb2-deep-speaker-recognition-1806.05622"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-source-enhancement-maximizing-source-to-distortion-ratio-via-automatic-differentiation-1806.05791</loc><lastmod>2018-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-source-enhancement-maximizing-source-to-distortion-ratio-via-automatic-differentiation-1806.05791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-source-enhancement-maximizing-source-to-distortion-ratio-via-automatic-differentiation-1806.05791"/></url>
<url><loc>https://scifaro.com/en/abs/extending-recurrent-neural-aligner-for-streaming-end-to-end-speech-recognition-in-mandarin-1806.06342</loc><lastmod>2019-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-recurrent-neural-aligner-for-streaming-end-to-end-speech-recognition-in-mandarin-1806.06342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-recurrent-neural-aligner-for-streaming-end-to-end-speech-recognition-in-mandarin-1806.06342"/></url>
<url><loc>https://scifaro.com/en/abs/cover-song-synthesis-by-analogy-1806.06347</loc><lastmod>2018-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cover-song-synthesis-by-analogy-1806.06347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cover-song-synthesis-by-analogy-1806.06347"/></url>
<url><loc>https://scifaro.com/en/abs/towards-multi-instrument-drum-transcription-1806.06676</loc><lastmod>2018-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-multi-instrument-drum-transcription-1806.06676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-multi-instrument-drum-transcription-1806.06676"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-efficient-deep-learning-model-for-musical-onset-detection-1806.06773</loc><lastmod>2018-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-efficient-deep-learning-model-for-musical-onset-detection-1806.06773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-efficient-deep-learning-model-for-musical-onset-detection-1806.06773"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-variants-of-velvet-noise-and-their-application-to-speech-processing-and-synthesis-with-appendices-1806.06812</loc><lastmod>2018-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-variants-of-velvet-noise-and-their-application-to-speech-processing-and-synthesis-with-appendices-1806.06812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-variants-of-velvet-noise-and-their-application-to-speech-processing-and-synthesis-with-appendices-1806.06812"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-fusion-of-deep-and-shallow-learning-for-acoustic-scene-classification-1806.07506</loc><lastmod>2018-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-fusion-of-deep-and-shallow-learning-for-acoustic-scene-classification-1806.07506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-fusion-of-deep-and-shallow-learning-for-acoustic-scene-classification-1806.07506"/></url>
<url><loc>https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-end-to-end-automatic-speech-recognition-1806.07789</loc><lastmod>2018-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-end-to-end-automatic-speech-recognition-1806.07789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-end-to-end-automatic-speech-recognition-1806.07789"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-diverse-high-quality-audio-textures-1806.08002</loc><lastmod>2018-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-diverse-high-quality-audio-textures-1806.08002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-diverse-high-quality-audio-textures-1806.08002"/></url>
<url><loc>https://scifaro.com/en/abs/learning-transposition-invariant-interval-features-from-symbolic-music-and-audio-1806.08236</loc><lastmod>2019-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-transposition-invariant-interval-features-from-symbolic-music-and-audio-1806.08236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-transposition-invariant-interval-features-from-symbolic-music-and-audio-1806.08236"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relationship-between-short-time-objective-intelligibility-and-short-time-spectral-amplitude-mean-square-error-for-speech-enhancement-1806.08404</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relationship-between-short-time-objective-intelligibility-and-short-time-spectral-amplitude-mean-square-error-for-speech-enhancement-1806.08404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relationship-between-short-time-objective-intelligibility-and-short-time-spectral-amplitude-mean-square-error-for-speech-enhancement-1806.08404"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-training-of-speaker-identification-models-1806.08621</loc><lastmod>2018-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-training-of-speaker-identification-models-1806.08621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-training-of-speaker-identification-models-1806.08621"/></url>
<url><loc>https://scifaro.com/en/abs/a-predictive-model-for-music-based-on-learned-interval-representations-1806.08686</loc><lastmod>2018-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-predictive-model-for-music-based-on-learned-interval-representations-1806.08686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-predictive-model-for-music-based-on-learned-interval-representations-1806.08686"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-language-models-of-tonal-harmony-1806.08724</loc><lastmod>2018-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-language-models-of-tonal-harmony-1806.08724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-language-models-of-tonal-harmony-1806.08724"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-gammatone-frequency-cepstral-coefficients-with-neural-networks-for-emotion-recognition-from-speech-1806.09010</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-gammatone-frequency-cepstral-coefficients-with-neural-networks-for-emotion-recognition-from-speech-1806.09010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-gammatone-frequency-cepstral-coefficients-with-neural-networks-for-emotion-recognition-from-speech-1806.09010"/></url>
<url><loc>https://scifaro.com/en/abs/robust-feature-clustering-for-unsupervised-speech-activity-detection-1806.09301</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-feature-clustering-for-unsupervised-speech-activity-detection-1806.09301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-feature-clustering-for-unsupervised-speech-activity-detection-1806.09301"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-dereverberation-via-generative-adversarial-training-1806.09325</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-dereverberation-via-generative-adversarial-training-1806.09325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-dereverberation-via-generative-adversarial-training-1806.09325"/></url>
<url><loc>https://scifaro.com/en/abs/frame-level-instrument-recognition-by-timbre-and-pitch-1806.09587</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-level-instrument-recognition-by-timbre-and-pitch-1806.09587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-level-instrument-recognition-by-timbre-and-pitch-1806.09587"/></url>
<url><loc>https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-using-a-conditional-adversarial-autoencoder-1806.09617</loc><lastmod>2018-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-using-a-conditional-adversarial-autoencoder-1806.09617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sounderfeit-cloning-a-physical-model-using-a-conditional-adversarial-autoencoder-1806.09617"/></url>
<url><loc>https://scifaro.com/en/abs/conditioning-deep-generative-raw-audio-models-for-structured-automatic-music-1806.09905</loc><lastmod>2018-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioning-deep-generative-raw-audio-models-for-structured-automatic-music-1806.09905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioning-deep-generative-raw-audio-models-for-structured-automatic-music-1806.09905"/></url>
<url><loc>https://scifaro.com/en/abs/text-independent-speaker-verification-based-on-deep-neural-networks-and-segmental-dynamic-time-warping-1806.09932</loc><lastmod>2018-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-independent-speaker-verification-based-on-deep-neural-networks-and-segmental-dynamic-time-warping-1806.09932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-independent-speaker-verification-based-on-deep-neural-networks-and-segmental-dynamic-time-warping-1806.09932"/></url>
<url><loc>https://scifaro.com/en/abs/the-challenge-of-realistic-music-generation-modelling-raw-audio-at-scale-1806.10474</loc><lastmod>2018-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-challenge-of-realistic-music-generation-modelling-raw-audio-at-scale-1806.10474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-challenge-of-realistic-music-generation-modelling-raw-audio-at-scale-1806.10474"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-majorness-as-a-perceptual-property-in-music-from-listener-ratings-1806.10570</loc><lastmod>2018-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-majorness-as-a-perceptual-property-in-music-from-listener-ratings-1806.10570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-majorness-as-a-perceptual-property-in-music-from-listener-ratings-1806.10570"/></url>
<url><loc>https://scifaro.com/en/abs/generationmania-learning-to-semantically-choreograph-1806.11170</loc><lastmod>2019-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generationmania-learning-to-semantically-choreograph-1806.11170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generationmania-learning-to-semantically-choreograph-1806.11170"/></url>
<url><loc>https://scifaro.com/en/abs/exploratory-analysis-of-a-large-flamenco-corpus-using-an-ensemble-of-convolutional-neural-networks-as-a-structural-annotation-backend-1807.00069</loc><lastmod>2018-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploratory-analysis-of-a-large-flamenco-corpus-using-an-ensemble-of-convolutional-neural-networks-as-a-structural-annotation-backend-1807.00069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploratory-analysis-of-a-large-flamenco-corpus-using-an-ensemble-of-convolutional-neural-networks-as-a-structural-annotation-backend-1807.00069"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-of-overlapping-sources-using-convolutional-recurrent-neural-networks-1807.00129</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-of-overlapping-sources-using-convolutional-recurrent-neural-networks-1807.00129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-of-overlapping-sources-using-convolutional-recurrent-neural-networks-1807.00129"/></url>
<url><loc>https://scifaro.com/en/abs/harnessing-ai-for-speech-reconstruction-using-multi-view-silent-video-feed-1807.00619</loc><lastmod>2018-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harnessing-ai-for-speech-reconstruction-using-multi-view-silent-video-feed-1807.00619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harnessing-ai-for-speech-reconstruction-using-multi-view-silent-video-feed-1807.00619"/></url>
<url><loc>https://scifaro.com/en/abs/an-energy-based-generative-sequence-model-for-testing-sensory-theories-of-western-harmony-1807.00790</loc><lastmod>2018-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-energy-based-generative-sequence-model-for-testing-sensory-theories-of-western-harmony-1807.00790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-energy-based-generative-sequence-model-for-testing-sensory-theories-of-western-harmony-1807.00790"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-end-to-end-techniques-for-low-resource-speech-recognition-1807.00868</loc><lastmod>2018-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-end-to-end-techniques-for-low-resource-speech-recognition-1807.00868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-end-to-end-techniques-for-low-resource-speech-recognition-1807.00868"/></url>
<url><loc>https://scifaro.com/en/abs/a-computational-study-of-the-role-of-tonal-tension-in-expressive-piano-performance-1807.01080</loc><lastmod>2018-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computational-study-of-the-role-of-tonal-tension-in-expressive-piano-performance-1807.01080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computational-study-of-the-role-of-tonal-tension-in-expressive-piano-performance-1807.01080"/></url>
<url><loc>https://scifaro.com/en/abs/denoising-auto-encoder-with-recurrent-skip-connections-and-residual-regression-for-music-source-separation-1807.01898</loc><lastmod>2018-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoising-auto-encoder-with-recurrent-skip-connections-and-residual-regression-for-music-source-separation-1807.01898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoising-auto-encoder-with-recurrent-skip-connections-and-residual-regression-for-music-source-separation-1807.01898"/></url>
<url><loc>https://scifaro.com/en/abs/singing-style-transfer-using-cycle-consistent-boundary-equilibrium-generative-adversarial-networks-1807.02254</loc><lastmod>2018-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-style-transfer-using-cycle-consistent-boundary-equilibrium-generative-adversarial-networks-1807.02254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-style-transfer-using-cycle-consistent-boundary-equilibrium-generative-adversarial-networks-1807.02254"/></url>
<url><loc>https://scifaro.com/en/abs/improving-dnn-based-music-source-separation-using-phase-features-1807.02710</loc><lastmod>2018-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-dnn-based-music-source-separation-using-phase-features-1807.02710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-dnn-based-music-source-separation-using-phase-features-1807.02710"/></url>
<url><loc>https://scifaro.com/en/abs/densely-connected-cnns-for-bird-audio-detection-1807.02776</loc><lastmod>2018-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/densely-connected-cnns-for-bird-audio-detection-1807.02776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/densely-connected-cnns-for-bird-audio-detection-1807.02776"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-singing-processing-achievements-challenges-and-impact-on-singers-and-listeners-1807.03046</loc><lastmod>2018-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-singing-processing-achievements-challenges-and-impact-on-singers-and-listeners-1807.03046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-singing-processing-achievements-challenges-and-impact-on-singers-and-listeners-1807.03046"/></url>
<url><loc>https://scifaro.com/en/abs/audiomnist-exploring-explainable-artificial-intelligence-for-audio-analysis-on-a-simple-benchmark-1807.03418</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiomnist-exploring-explainable-artificial-intelligence-for-audio-analysis-on-a-simple-benchmark-1807.03418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiomnist-exploring-explainable-artificial-intelligence-for-audio-analysis-on-a-simple-benchmark-1807.03418"/></url>
<url><loc>https://scifaro.com/en/abs/phase-reconstruction-from-amplitude-spectrograms-based-on-von-mises-distribution-deep-neural-network-1807.03474</loc><lastmod>2018-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-reconstruction-from-amplitude-spectrograms-based-on-von-mises-distribution-deep-neural-network-1807.03474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-reconstruction-from-amplitude-spectrograms-based-on-von-mises-distribution-deep-neural-network-1807.03474"/></url>
<url><loc>https://scifaro.com/en/abs/foreign-english-accent-adjustment-by-learning-phonetic-patterns-1807.03625</loc><lastmod>2018-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foreign-english-accent-adjustment-by-learning-phonetic-patterns-1807.03625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foreign-english-accent-adjustment-by-learning-phonetic-patterns-1807.03625"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-from-speech-based-on-relevant-feature-and-majority-voting-1807.03909</loc><lastmod>2018-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-based-on-relevant-feature-and-majority-voting-1807.03909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-based-on-relevant-feature-and-majority-voting-1807.03909"/></url>
<url><loc>https://scifaro.com/en/abs/a-punishment-voting-algorithm-based-on-super-categories-construction-for-acoustic-scene-classification-1807.04073</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-punishment-voting-algorithm-based-on-super-categories-construction-for-acoustic-scene-classification-1807.04073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-punishment-voting-algorithm-based-on-super-categories-construction-for-acoustic-scene-classification-1807.04073"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-acoustic-features-for-acoustic-scene-classification-and-score-fusion-of-multi-classification-systems-applied-to-dcase-2016-challenge-1807.04970</loc><lastmod>2018-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-acoustic-features-for-acoustic-scene-classification-and-score-fusion-of-multi-classification-systems-applied-to-dcase-2016-challenge-1807.04970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-acoustic-features-for-acoustic-scene-classification-and-score-fusion-of-multi-classification-systems-applied-to-dcase-2016-challenge-1807.04970"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-acoustic-detection-of-birds-through-deep-learning-the-first-bird-audio-detection-challenge-1807.05812</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-acoustic-detection-of-birds-through-deep-learning-the-first-bird-audio-detection-challenge-1807.05812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-acoustic-detection-of-birds-through-deep-learning-the-first-bird-audio-detection-challenge-1807.05812"/></url>
<url><loc>https://scifaro.com/en/abs/subjective-and-objective-experiments-on-the-influence-of-speaker-s-gender-on-the-unvoiced-segments-1807.05813</loc><lastmod>2018-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subjective-and-objective-experiments-on-the-influence-of-speaker-s-gender-on-the-unvoiced-segments-1807.05813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subjective-and-objective-experiments-on-the-influence-of-speaker-s-gender-on-the-unvoiced-segments-1807.05813"/></url>
<url><loc>https://scifaro.com/en/abs/psychological-constraints-on-string-based-methods-for-pattern-discovery-in-polyphonic-corpora-1807.06700</loc><lastmod>2018-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psychological-constraints-on-string-based-methods-for-pattern-discovery-in-polyphonic-corpora-1807.06700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psychological-constraints-on-string-based-methods-for-pattern-discovery-in-polyphonic-corpora-1807.06700"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-based-speech-separation-optimizing-an-objective-estimator-of-intelligibility-for-low-latency-applications-1807.06899</loc><lastmod>2018-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-based-speech-separation-optimizing-an-objective-estimator-of-intelligibility-for-low-latency-applications-1807.06899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-based-speech-separation-optimizing-an-objective-estimator-of-intelligibility-for-low-latency-applications-1807.06899"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-weakly-supervised-learning-for-low-resource-audio-event-detection-using-deep-learning-1807.06972</loc><lastmod>2018-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-weakly-supervised-learning-for-low-resource-audio-event-detection-using-deep-learning-1807.06972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-weakly-supervised-learning-for-low-resource-audio-event-detection-using-deep-learning-1807.06972"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-score-alignment-using-transposition-invariant-features-1807.07278</loc><lastmod>2018-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-score-alignment-using-transposition-invariant-features-1807.07278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-score-alignment-using-transposition-invariant-features-1807.07278"/></url>
<url><loc>https://scifaro.com/en/abs/noise-adaptive-speech-enhancement-using-domain-adversarial-training-1807.07501</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-adaptive-speech-enhancement-using-domain-adversarial-training-1807.07501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-adaptive-speech-enhancement-using-domain-adversarial-training-1807.07501"/></url>
<url><loc>https://scifaro.com/en/abs/a-fully-convolutional-neural-network-approach-to-end-to-end-speech-enhancement-1807.07959</loc><lastmod>2018-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fully-convolutional-neural-network-approach-to-end-to-end-speech-enhancement-1807.07959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fully-convolutional-neural-network-approach-to-end-to-end-speech-enhancement-1807.07959"/></url>
<url><loc>https://scifaro.com/en/abs/auto-adaptive-resonance-equalization-using-dilated-residual-networks-1807.08636</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-adaptive-resonance-equalization-using-dilated-residual-networks-1807.08636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-adaptive-resonance-equalization-using-dilated-residual-networks-1807.08636"/></url>
<url><loc>https://scifaro.com/en/abs/joint-time-frequency-scattering-1807.08869</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-time-frequency-scattering-1807.08869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-time-frequency-scattering-1807.08869"/></url>
<url><loc>https://scifaro.com/en/abs/deep-extractor-network-for-target-speaker-recovery-from-single-channel-speech-mixtures-1807.08974</loc><lastmod>2018-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-extractor-network-for-target-speaker-recovery-from-single-channel-speech-mixtures-1807.08974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-extractor-network-for-target-speaker-recovery-from-single-channel-speech-mixtures-1807.08974"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-of-deep-audio-feature-and-i-vector-for-artist-recognition-1807.09208</loc><lastmod>2018-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-of-deep-audio-feature-and-i-vector-for-artist-recognition-1807.09208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-of-deep-audio-feature-and-i-vector-for-artist-recognition-1807.09208"/></url>
<url><loc>https://scifaro.com/en/abs/general-purpose-tagging-of-freesound-audio-with-audioset-labels-task-description-dataset-and-baseline-1807.09902</loc><lastmod>2018-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/general-purpose-tagging-of-freesound-audio-with-audioset-labels-task-description-dataset-and-baseline-1807.09902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/general-purpose-tagging-of-freesound-audio-with-audioset-labels-task-description-dataset-and-baseline-1807.09902"/></url>
<url><loc>https://scifaro.com/en/abs/modulation-domain-kalman-filtering-for-monaural-blind-speech-denoising-and-dereverberation-1807.10236</loc><lastmod>2018-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulation-domain-kalman-filtering-for-monaural-blind-speech-denoising-and-dereverberation-1807.10236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulation-domain-kalman-filtering-for-monaural-blind-speech-denoising-and-dereverberation-1807.10236"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-weakly-labeled-semi-supervised-sound-event-detection-in-domestic-environments-1807.10501</loc><lastmod>2018-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-weakly-labeled-semi-supervised-sound-event-detection-in-domestic-environments-1807.10501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-weakly-labeled-semi-supervised-sound-event-detection-in-domestic-environments-1807.10501"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automatic-speech-identification-from-vocal-tract-shape-dynamics-in-real-time-mri-1807.11089</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automatic-speech-identification-from-vocal-tract-shape-dynamics-in-real-time-mri-1807.11089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automatic-speech-identification-from-vocal-tract-shape-dynamics-in-real-time-mri-1807.11089"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-acoustic-localization-using-deep-learning-from-audio-signal-to-source-position-coordinates-1807.11094</loc><lastmod>2019-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-acoustic-localization-using-deep-learning-from-audio-signal-to-source-position-coordinates-1807.11094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-acoustic-localization-using-deep-learning-from-audio-signal-to-source-position-coordinates-1807.11094"/></url>
<url><loc>https://scifaro.com/en/abs/audio-segmentation-based-on-melodic-style-with-hand-crafted-features-and-with-convolutional-neural-networks-1807.11138</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-segmentation-based-on-melodic-style-with-hand-crafted-features-and-with-convolutional-neural-networks-1807.11138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-segmentation-based-on-melodic-style-with-hand-crafted-features-and-with-convolutional-neural-networks-1807.11138"/></url>
<url><loc>https://scifaro.com/en/abs/lead-sheet-generation-and-arrangement-by-conditional-generative-adversarial-network-1807.11161</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lead-sheet-generation-and-arrangement-by-conditional-generative-adversarial-network-1807.11161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lead-sheet-generation-and-arrangement-by-conditional-generative-adversarial-network-1807.11161"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-percussive-source-separation-with-deep-neural-networks-and-phase-recovery-1807.11298</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-percussive-source-separation-with-deep-neural-networks-and-phase-recovery-1807.11298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-percussive-source-separation-with-deep-neural-networks-and-phase-recovery-1807.11298"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-driven-speaker-independent-audio-visual-mask-estimation-for-speech-separation-1808.00060</loc><lastmod>2018-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-driven-speaker-independent-audio-visual-mask-estimation-for-speech-separation-1808.00060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-driven-speaker-independent-audio-visual-mask-estimation-for-speech-separation-1808.00060"/></url>
<url><loc>https://scifaro.com/en/abs/ava-speech-a-densely-labeled-dataset-of-speech-activity-in-movies-1808.00606</loc><lastmod>2018-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ava-speech-a-densely-labeled-dataset-of-speech-activity-in-movies-1808.00606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ava-speech-a-densely-labeled-dataset-of-speech-activity-in-movies-1808.00606"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2018-challenge-surrey-cross-task-convolutional-neural-network-baseline-1808.00773</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2018-challenge-surrey-cross-task-convolutional-neural-network-baseline-1808.00773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2018-challenge-surrey-cross-task-convolutional-neural-network-baseline-1808.00773"/></url>
<url><loc>https://scifaro.com/en/abs/histogram-transform-based-speaker-identification-1808.00959</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/histogram-transform-based-speaker-identification-1808.00959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/histogram-transform-based-speaker-identification-1808.00959"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-speech-model-description-with-vmf-mixture-model-1808.00960</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-speech-model-description-with-vmf-mixture-model-1808.00960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-speech-model-description-with-vmf-mixture-model-1808.00960"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-raga-notes-with-a-markov-chain-of-order-1-2-1808.01603</loc><lastmod>2018-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-raga-notes-with-a-markov-chain-of-order-1-2-1808.01603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-raga-notes-with-a-markov-chain-of-order-1-2-1808.01603"/></url>
<url><loc>https://scifaro.com/en/abs/audio-tagging-with-connectionist-temporal-classification-model-using-sequential-labelled-data-1808.01935</loc><lastmod>2018-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-tagging-with-connectionist-temporal-classification-model-using-sequential-labelled-data-1808.01935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-tagging-with-connectionist-temporal-classification-model-using-sequential-labelled-data-1808.01935"/></url>
<url><loc>https://scifaro.com/en/abs/towards-learning-fine-grained-disentangled-representations-from-speech-1808.02939</loc><lastmod>2018-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-learning-fine-grained-disentangled-representations-from-speech-1808.02939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-learning-fine-grained-disentangled-representations-from-speech-1808.02939"/></url>
<url><loc>https://scifaro.com/en/abs/rhythm-flexible-voice-conversion-without-parallel-data-using-cycle-gan-over-phoneme-posteriorgram-sequences-1808.03113</loc><lastmod>2018-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythm-flexible-voice-conversion-without-parallel-data-using-cycle-gan-over-phoneme-posteriorgram-sequences-1808.03113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythm-flexible-voice-conversion-without-parallel-data-using-cycle-gan-over-phoneme-posteriorgram-sequences-1808.03113"/></url>
<url><loc>https://scifaro.com/en/abs/this-time-with-feeling-learning-expressive-musical-performance-1808.03715</loc><lastmod>2018-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/this-time-with-feeling-learning-expressive-musical-performance-1808.03715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/this-time-with-feeling-learning-expressive-musical-performance-1808.03715"/></url>
<url><loc>https://scifaro.com/en/abs/sample-mixed-based-data-augmentation-for-domestic-audio-tagging-1808.03883</loc><lastmod>2018-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-mixed-based-data-augmentation-for-domestic-audio-tagging-1808.03883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-mixed-based-data-augmentation-for-domestic-audio-tagging-1808.03883"/></url>
<url><loc>https://scifaro.com/en/abs/murmur-detection-using-parallel-recurrent-convolutional-neural-networks-1808.04411</loc><lastmod>2018-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/murmur-detection-using-parallel-recurrent-convolutional-neural-networks-1808.04411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/murmur-detection-using-parallel-recurrent-convolutional-neural-networks-1808.04411"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-using-disentangled-and-interpretable-representations-for-one-shot-cross-lingual-voice-conversion-1808.05294</loc><lastmod>2018-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-using-disentangled-and-interpretable-representations-for-one-shot-cross-lingual-voice-conversion-1808.05294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-using-disentangled-and-interpretable-representations-for-one-shot-cross-lingual-voice-conversion-1808.05294"/></url>
<url><loc>https://scifaro.com/en/abs/improved-chord-recognition-by-combining-duration-and-harmonic-language-models-1808.05335</loc><lastmod>2018-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-chord-recognition-by-combining-duration-and-harmonic-language-models-1808.05335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-chord-recognition-by-combining-duration-and-harmonic-language-models-1808.05335"/></url>
<url><loc>https://scifaro.com/en/abs/genre-agnostic-key-classification-with-convolutional-neural-networks-1808.05340</loc><lastmod>2018-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/genre-agnostic-key-classification-with-convolutional-neural-networks-1808.05340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/genre-agnostic-key-classification-with-convolutional-neural-networks-1808.05340"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-chord-recognition-with-higher-order-harmonic-language-modelling-1808.05341</loc><lastmod>2018-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-chord-recognition-with-higher-order-harmonic-language-modelling-1808.05341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-chord-recognition-with-higher-order-harmonic-language-modelling-1808.05341"/></url>
<url><loc>https://scifaro.com/en/abs/quality-net-an-end-to-end-non-intrusive-speech-quality-assessment-model-based-on-blstm-1808.05344</loc><lastmod>2018-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quality-net-an-end-to-end-non-intrusive-speech-quality-assessment-model-based-on-blstm-1808.05344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quality-net-an-end-to-end-non-intrusive-speech-quality-assessment-model-based-on-blstm-1808.05344"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speaker-clustering-using-mixtures-of-von-mises-fisher-distributions-for-naturalistic-audio-streams-1808.06045</loc><lastmod>2018-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speaker-clustering-using-mixtures-of-von-mises-fisher-distributions-for-naturalistic-audio-streams-1808.06045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speaker-clustering-using-mixtures-of-von-mises-fisher-distributions-for-naturalistic-audio-streams-1808.06045"/></url>
<url><loc>https://scifaro.com/en/abs/deep-residual-network-for-sound-source-localization-in-the-time-domain-1808.06429</loc><lastmod>2018-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-residual-network-for-sound-source-localization-in-the-time-domain-1808.06429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-residual-network-for-sound-source-localization-in-the-time-domain-1808.06429"/></url>
<url><loc>https://scifaro.com/en/abs/r-crnn-region-based-convolutional-recurrent-neural-network-for-audio-event-detection-1808.06627</loc><lastmod>2018-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/r-crnn-region-based-convolutional-recurrent-neural-network-for-audio-event-detection-1808.06627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/r-crnn-region-based-convolutional-recurrent-neural-network-for-audio-event-detection-1808.06627"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-model-for-detection-of-rare-sound-events-1808.06676</loc><lastmod>2018-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-model-for-detection-of-rare-sound-events-1808.06676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-model-for-detection-of-rare-sound-events-1808.06676"/></url>
<url><loc>https://scifaro.com/en/abs/fast-spectrogram-inversion-using-multi-head-convolutional-neural-networks-1808.06719</loc><lastmod>2018-12-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-spectrogram-inversion-using-multi-head-convolutional-neural-networks-1808.06719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-spectrogram-inversion-using-multi-head-convolutional-neural-networks-1808.06719"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-a-unified-attention-based-pooling-framework-for-speaker-verification-1808.07120</loc><lastmod>2018-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-a-unified-attention-based-pooling-framework-for-speaker-verification-1808.07120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-a-unified-attention-based-pooling-framework-for-speaker-verification-1808.07120"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-with-conditional-samplernn-1808.08311</loc><lastmod>2018-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-with-conditional-samplernn-1808.08311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-with-conditional-samplernn-1808.08311"/></url>
<url><loc>https://scifaro.com/en/abs/multiobjective-optimization-training-of-plda-for-speaker-verification-1808.08344</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiobjective-optimization-training-of-plda-for-speaker-verification-1808.08344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiobjective-optimization-training-of-plda-for-speaker-verification-1808.08344"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-network-with-mixup-for-environmental-sound-classification-1808.08405</loc><lastmod>2018-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-with-mixup-for-environmental-sound-classification-1808.08405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-with-mixup-for-environmental-sound-classification-1808.08405"/></url>
<url><loc>https://scifaro.com/en/abs/augmenting-bottleneck-features-of-deep-neural-network-employing-motor-state-for-speech-recognition-at-humanoid-robots-1808.08702</loc><lastmod>2018-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmenting-bottleneck-features-of-deep-neural-network-employing-motor-state-for-speech-recognition-at-humanoid-robots-1808.08702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmenting-bottleneck-features-of-deep-neural-network-employing-motor-state-for-speech-recognition-at-humanoid-robots-1808.08702"/></url>
<url><loc>https://scifaro.com/en/abs/extended-playing-techniques-the-next-milestone-in-musical-instrument-recognition-1808.09730</loc><lastmod>2018-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extended-playing-techniques-the-next-milestone-in-musical-instrument-recognition-1808.09730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extended-playing-techniques-the-next-milestone-in-musical-instrument-recognition-1808.09730"/></url>
<url><loc>https://scifaro.com/en/abs/mes-p-an-emotional-tonal-speech-dataset-in-mandarin-chinese-with-distal-and-proximal-labels-1808.10095</loc><lastmod>2018-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mes-p-an-emotional-tonal-speech-dataset-in-mandarin-chinese-with-distal-and-proximal-labels-1808.10095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mes-p-an-emotional-tonal-speech-dataset-in-mandarin-chinese-with-distal-and-proximal-labels-1808.10095"/></url>
<url><loc>https://scifaro.com/en/abs/contribution-of-glottal-waveform-in-speech-emotion-a-comparative-pairwise-investigation-1808.10144</loc><lastmod>2018-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contribution-of-glottal-waveform-in-speech-emotion-a-comparative-pairwise-investigation-1808.10144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contribution-of-glottal-waveform-in-speech-emotion-a-comparative-pairwise-investigation-1808.10144"/></url>
<url><loc>https://scifaro.com/en/abs/single-microphone-speech-enhancement-and-separation-using-deep-learning-1808.10620</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-microphone-speech-enhancement-and-separation-using-deep-learning-1808.10620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-microphone-speech-enhancement-and-separation-using-deep-learning-1808.10620"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-linguistic-acoustic-decoder-1808.10678</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-linguistic-acoustic-decoder-1808.10678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-linguistic-acoustic-decoder-1808.10678"/></url>
<url><loc>https://scifaro.com/en/abs/whispered-to-voiced-alaryngeal-speech-conversion-with-generative-adversarial-networks-1808.10687</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whispered-to-voiced-alaryngeal-speech-conversion-with-generative-adversarial-networks-1808.10687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whispered-to-voiced-alaryngeal-speech-conversion-with-generative-adversarial-networks-1808.10687"/></url>
<url><loc>https://scifaro.com/en/abs/a-machine-learning-driven-iot-solution-for-noise-classification-in-smart-cities-1809.00238</loc><lastmod>2018-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-machine-learning-driven-iot-solution-for-noise-classification-in-smart-cities-1809.00238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-machine-learning-driven-iot-solution-for-noise-classification-in-smart-cities-1809.00238"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-for-fundamental-frequency-estimation-in-music-1809.00381</loc><lastmod>2018-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-for-fundamental-frequency-estimation-in-music-1809.00381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-for-fundamental-frequency-estimation-in-music-1809.00381"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-of-human-perception-in-audio-event-classification-1809.00502</loc><lastmod>2018-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-of-human-perception-in-audio-event-classification-1809.00502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-of-human-perception-in-audio-event-classification-1809.00502"/></url>
<url><loc>https://scifaro.com/en/abs/deep-room-recognition-using-inaudible-echos-1809.00531</loc><lastmod>2018-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-room-recognition-using-inaudible-echos-1809.00531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-room-recognition-using-inaudible-echos-1809.00531"/></url>
<url><loc>https://scifaro.com/en/abs/automated-bird-sound-recognition-in-realistic-settings-1809.01133</loc><lastmod>2018-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-bird-sound-recognition-in-realistic-settings-1809.01133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-bird-sound-recognition-in-realistic-settings-1809.01133"/></url>
<url><loc>https://scifaro.com/en/abs/three-stage-speaker-verification-architecture-in-emotional-talking-environments-1809.01721</loc><lastmod>2018-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/three-stage-speaker-verification-architecture-in-emotional-talking-environments-1809.01721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/three-stage-speaker-verification-architecture-in-emotional-talking-environments-1809.01721"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-generation-of-spatial-audio-for-360-video-1809.02587</loc><lastmod>2018-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-generation-of-spatial-audio-for-360-video-1809.02587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-generation-of-spatial-audio-for-360-video-1809.02587"/></url>
<url><loc>https://scifaro.com/en/abs/transforming-acoustic-characteristics-to-deceive-playback-spoofing-countermeasures-of-speaker-verification-systems-1809.04274</loc><lastmod>2018-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transforming-acoustic-characteristics-to-deceive-playback-spoofing-countermeasures-of-speaker-verification-systems-1809.04274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transforming-acoustic-characteristics-to-deceive-playback-spoofing-countermeasures-of-speaker-verification-systems-1809.04274"/></url>
<url><loc>https://scifaro.com/en/abs/isolated-and-ensemble-audio-preprocessing-methods-for-detecting-adversarial-examples-against-automatic-speech-recognition-1809.04397</loc><lastmod>2018-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/isolated-and-ensemble-audio-preprocessing-methods-for-detecting-adversarial-examples-against-automatic-speech-recognition-1809.04397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/isolated-and-ensemble-audio-preprocessing-methods-for-detecting-adversarial-examples-against-automatic-speech-recognition-1809.04397"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-stage-algorithm-for-acoustic-physical-model-parameters-estimation-1809.05483</loc><lastmod>2019-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-stage-algorithm-for-acoustic-physical-model-parameters-estimation-1809.05483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-stage-algorithm-for-acoustic-physical-model-parameters-estimation-1809.05483"/></url>
<url><loc>https://scifaro.com/en/abs/attention-as-a-perspective-for-learning-tempo-invariant-audio-queries-1809.05689</loc><lastmod>2018-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-as-a-perspective-for-learning-tempo-invariant-audio-queries-1809.05689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-as-a-perspective-for-learning-tempo-invariant-audio-queries-1809.05689"/></url>
<url><loc>https://scifaro.com/en/abs/cocktails-but-no-party-multipath-enabled-private-audio-1809.05862</loc><lastmod>2018-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cocktails-but-no-party-multipath-enabled-private-audio-1809.05862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cocktails-but-no-party-multipath-enabled-private-audio-1809.05862"/></url>
<url><loc>https://scifaro.com/en/abs/deepdrum-an-adaptive-conditional-neural-network-1809.06127</loc><lastmod>2019-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepdrum-an-adaptive-conditional-neural-network-1809.06127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepdrum-an-adaptive-conditional-neural-network-1809.06127"/></url>
<url><loc>https://scifaro.com/en/abs/switching-divergences-for-spectral-learning-in-blind-speech-dereverberation-1809.07375</loc><lastmod>2018-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/switching-divergences-for-spectral-learning-in-blind-speech-dereverberation-1809.07375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/switching-divergences-for-spectral-learning-in-blind-speech-dereverberation-1809.07375"/></url>
<url><loc>https://scifaro.com/en/abs/conv-tasnet-surpassing-ideal-time-frequency-magnitude-masking-for-speech-separation-1809.07454</loc><lastmod>2019-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conv-tasnet-surpassing-ideal-time-frequency-magnitude-masking-for-speech-separation-1809.07454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conv-tasnet-surpassing-ideal-time-frequency-magnitude-masking-for-speech-separation-1809.07454"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-genre-transfer-with-cyclegan-1809.07575</loc><lastmod>2018-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-genre-transfer-with-cyclegan-1809.07575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-genre-transfer-with-cyclegan-1809.07575"/></url>
<url><loc>https://scifaro.com/en/abs/midi-vae-modeling-dynamics-and-instrumentation-of-music-with-applications-to-style-transfer-1809.07600</loc><lastmod>2018-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-vae-modeling-dynamics-and-instrumentation-of-music-with-applications-to-style-transfer-1809.07600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-vae-modeling-dynamics-and-instrumentation-of-music-with-applications-to-style-transfer-1809.07600"/></url>
<url><loc>https://scifaro.com/en/abs/attention-mechanism-in-speaker-recognition-what-does-it-learn-in-deep-speaker-embedding-1809.09311</loc><lastmod>2018-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-mechanism-in-speaker-recognition-what-does-it-learn-in-deep-speaker-embedding-1809.09311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-mechanism-in-speaker-recognition-what-does-it-learn-in-deep-speaker-embedding-1809.09311"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploration-of-mimic-architectures-for-residual-network-based-spectral-mapping-1809.09756</loc><lastmod>2018-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploration-of-mimic-architectures-for-residual-network-based-spectral-mapping-1809.09756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploration-of-mimic-architectures-for-residual-network-based-spectral-mapping-1809.09756"/></url>
<url><loc>https://scifaro.com/en/abs/an-extensible-cluster-graph-taxonomy-for-open-set-sound-scene-analysis-1809.10047</loc><lastmod>2018-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-extensible-cluster-graph-taxonomy-for-open-set-sound-scene-analysis-1809.10047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-extensible-cluster-graph-taxonomy-for-open-set-sound-scene-analysis-1809.10047"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-probing-for-estimating-the-storage-time-and-firmness-of-tomatoes-and-mandarin-oranges-1809.10581</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-probing-for-estimating-the-storage-time-and-firmness-of-tomatoes-and-mandarin-oranges-1809.10581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-probing-for-estimating-the-storage-time-and-firmness-of-tomatoes-and-mandarin-oranges-1809.10581"/></url>
<url><loc>https://scifaro.com/en/abs/online-localization-and-tracking-of-multiple-moving-speakers-in-reverberant-environments-1809.10936</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-localization-and-tracking-of-multiple-moving-speakers-in-reverberant-environments-1809.10936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-localization-and-tracking-of-multiple-moving-speakers-in-reverberant-environments-1809.10936"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-pass-phrase-verification-in-the-i-vector-space-1809.11068</loc><lastmod>2018-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-pass-phrase-verification-in-the-i-vector-space-1809.11068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-pass-phrase-verification-in-the-i-vector-space-1809.11068"/></url>
<url><loc>https://scifaro.com/en/abs/modulated-variational-auto-encoders-for-many-to-many-musical-timbre-transfer-1810.00222</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulated-variational-auto-encoders-for-many-to-many-musical-timbre-transfer-1810.00222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulated-variational-auto-encoders-for-many-to-many-musical-timbre-transfer-1810.00222"/></url>
<url><loc>https://scifaro.com/en/abs/eigentriads-and-eigenprogressions-on-the-tonnetz-1810.00790</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eigentriads-and-eigenprogressions-on-the-tonnetz-1810.00790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eigentriads-and-eigenprogressions-on-the-tonnetz-1810.00790"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-music-texture-transfer-system-1810.01248</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-music-texture-transfer-system-1810.01248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-music-texture-transfer-system-1810.01248"/></url>
<url><loc>https://scifaro.com/en/abs/phasebook-and-friends-leveraging-discrete-representations-for-source-separation-1810.01395</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phasebook-and-friends-leveraging-discrete-representations-for-source-separation-1810.01395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phasebook-and-friends-leveraging-discrete-representations-for-source-separation-1810.01395"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-approaches-for-understanding-simple-speech-commands-1810.02364</loc><lastmod>2018-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-approaches-for-understanding-simple-speech-commands-1810.02364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-approaches-for-understanding-simple-speech-commands-1810.02364"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-recurrent-latent-variable-model-for-music-composition-1810.03226</loc><lastmod>2018-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-recurrent-latent-variable-model-for-music-composition-1810.03226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-recurrent-latent-variable-model-for-music-composition-1810.03226"/></url>
<url><loc>https://scifaro.com/en/abs/sam-gcnn-a-gated-convolutional-neural-network-with-segment-level-attention-mechanism-for-home-activity-monitoring-1810.03986</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sam-gcnn-a-gated-convolutional-neural-network-with-segment-level-attention-mechanism-for-home-activity-monitoring-1810.03986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sam-gcnn-a-gated-convolutional-neural-network-with-segment-level-attention-mechanism-for-home-activity-monitoring-1810.03986"/></url>
<url><loc>https://scifaro.com/en/abs/tramp-tracking-by-a-real-time-ambisonic-based-particle-filter-1810.04080</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tramp-tracking-by-a-real-time-ambisonic-based-particle-filter-1810.04080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tramp-tracking-by-a-real-time-ambisonic-based-particle-filter-1810.04080"/></url>
<url><loc>https://scifaro.com/en/abs/current-trends-and-future-research-directions-for-interactive-music-1810.04276</loc><lastmod>2018-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/current-trends-and-future-research-directions-for-interactive-music-1810.04276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/current-trends-and-future-research-directions-for-interactive-music-1810.04276"/></url>
<url><loc>https://scifaro.com/en/abs/on-time-frequency-scattering-and-computer-music-1810.04506</loc><lastmod>2019-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-time-frequency-scattering-and-computer-music-1810.04506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-time-frequency-scattering-and-computer-music-1810.04506"/></url>
<url><loc>https://scifaro.com/en/abs/novel-cascaded-gaussian-mixture-model-deep-neural-network-classifier-for-speaker-identification-in-emotional-talking-environments-1810.04908</loc><lastmod>2018-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-cascaded-gaussian-mixture-model-deep-neural-network-classifier-for-speaker-identification-in-emotional-talking-environments-1810.04908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-cascaded-gaussian-mixture-model-deep-neural-network-classifier-for-speaker-identification-in-emotional-talking-environments-1810.04908"/></url>
<url><loc>https://scifaro.com/en/abs/listening-for-sirens-locating-and-classifying-acoustic-alarms-in-city-scenes-1810.04989</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-for-sirens-locating-and-classifying-acoustic-alarms-in-city-scenes-1810.04989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-for-sirens-locating-and-classifying-acoustic-alarms-in-city-scenes-1810.04989"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-acoustic-modeling-for-voice-conversion-1810.06865</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-acoustic-modeling-for-voice-conversion-1810.06865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-acoustic-modeling-for-voice-conversion-1810.06865"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-semi-supervised-data-with-gcrnns-vat-and-self-adaptive-label-refinement-1810.06897</loc><lastmod>2018-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-semi-supervised-data-with-gcrnns-vat-and-self-adaptive-label-refinement-1810.06897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-using-weakly-labeled-semi-supervised-data-with-gcrnns-vat-and-self-adaptive-label-refinement-1810.06897"/></url>
<url><loc>https://scifaro.com/en/abs/the-trajectory-of-voice-onset-time-with-vocal-aging-1810.07030</loc><lastmod>2018-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-trajectory-of-voice-onset-time-with-vocal-aging-1810.07030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-trajectory-of-voice-onset-time-with-vocal-aging-1810.07030"/></url>
<url><loc>https://scifaro.com/en/abs/a-database-linking-piano-and-orchestral-midi-scores-with-application-to-automatic-projective-orchestration-1810.08611</loc><lastmod>2018-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-database-linking-piano-and-orchestral-midi-scores-with-application-to-automatic-projective-orchestration-1810.08611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-database-linking-piano-and-orchestral-midi-scores-with-application-to-automatic-projective-orchestration-1810.08611"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-five-multiple-instance-learning-pooling-functions-for-sound-event-detection-with-weak-labeling-1810.09050</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-five-multiple-instance-learning-pooling-functions-for-sound-event-detection-with-weak-labeling-1810.09050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-five-multiple-instance-learning-pooling-functions-for-sound-event-detection-with-weak-labeling-1810.09050"/></url>
<url><loc>https://scifaro.com/en/abs/connectionist-temporal-localization-for-sound-event-detection-with-sequential-labeling-1810.09052</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/connectionist-temporal-localization-for-sound-event-detection-with-sequential-labeling-1810.09052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/connectionist-temporal-localization-for-sound-event-detection-with-sequential-labeling-1810.09052"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-monaural-front-end-processing-for-robust-asr-without-retraining-or-joint-training-1810.09067</loc><lastmod>2018-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-monaural-front-end-processing-for-robust-asr-without-retraining-or-joint-training-1810.09067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-monaural-front-end-processing-for-robust-asr-without-retraining-or-joint-training-1810.09067"/></url>
<url><loc>https://scifaro.com/en/abs/our-practice-of-using-machine-learning-to-recognize-species-by-voice-1810.09078</loc><lastmod>2018-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/our-practice-of-using-machine-learning-to-recognize-species-by-voice-1810.09078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/our-practice-of-using-machine-learning-to-recognize-species-by-voice-1810.09078"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-acoustic-identification-of-individual-animals-improving-generalisation-across-species-and-recording-conditions-1810.09273</loc><lastmod>2018-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-acoustic-identification-of-individual-animals-improving-generalisation-across-species-and-recording-conditions-1810.09273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-acoustic-identification-of-individual-animals-improving-generalisation-across-species-and-recording-conditions-1810.09273"/></url>
<url><loc>https://scifaro.com/en/abs/sing-symbol-to-instrument-neural-generator-1810.09785</loc><lastmod>2018-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sing-symbol-to-instrument-neural-generator-1810.09785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sing-symbol-to-instrument-neural-generator-1810.09785"/></url>
<url><loc>https://scifaro.com/en/abs/chord-recognition-in-symbolic-music-a-segmental-crf-model-segment-level-features-and-comparative-evaluations-on-classical-and-popular-music-1810.10002</loc><lastmod>2018-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-recognition-in-symbolic-music-a-segmental-crf-model-segment-level-features-and-comparative-evaluations-on-classical-and-popular-music-1810.10002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-recognition-in-symbolic-music-a-segmental-crf-model-segment-level-features-and-comparative-evaluations-on-classical-and-popular-music-1810.10002"/></url>
<url><loc>https://scifaro.com/en/abs/training-neural-audio-classifiers-with-few-data-1810.10274</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-neural-audio-classifiers-with-few-data-1810.10274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-neural-audio-classifiers-with-few-data-1810.10274"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-auto-encoder-for-speech-emotion-recognition-1810.10662</loc><lastmod>2018-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-auto-encoder-for-speech-emotion-recognition-1810.10662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-auto-encoder-for-speech-emotion-recognition-1810.10662"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-over-smoothness-in-speech-synthesis-using-generative-adversarial-networks-1810.10989</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-over-smoothness-in-speech-synthesis-using-generative-adversarial-networks-1810.10989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-over-smoothness-in-speech-synthesis-using-generative-adversarial-networks-1810.10989"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-pyramidal-fsmn-architecture-with-lattice-free-mmi-for-speech-recognition-1810.11352</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-pyramidal-fsmn-architecture-with-lattice-free-mmi-for-speech-recognition-1810.11352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-pyramidal-fsmn-architecture-with-lattice-free-mmi-for-speech-recognition-1810.11352"/></url>
<url><loc>https://scifaro.com/en/abs/spectrogram-channels-u-net-a-source-separation-model-viewing-each-channel-as-the-spectrogram-of-each-source-1810.11520</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrogram-channels-u-net-a-source-separation-model-viewing-each-channel-as-the-spectrogram-of-each-source-1810.11520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrogram-channels-u-net-a-source-separation-model-viewing-each-channel-as-the-spectrogram-of-each-source-1810.11520"/></url>
<url><loc>https://scifaro.com/en/abs/short-segment-heart-sound-classification-using-an-ensemble-of-deep-convolutional-neural-networks-1810.11573</loc><lastmod>2020-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/short-segment-heart-sound-classification-using-an-ensemble-of-deep-convolutional-neural-networks-1810.11573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/short-segment-heart-sound-classification-using-an-ensemble-of-deep-convolutional-neural-networks-1810.11573"/></url>
<url><loc>https://scifaro.com/en/abs/learning-how-to-listen-a-temporal-frequential-attention-model-for-sound-event-detection-1810.11939</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-how-to-listen-a-temporal-frequential-attention-model-for-sound-event-detection-1810.11939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-how-to-listen-a-temporal-frequential-attention-model-for-sound-event-detection-1810.11939"/></url>
<url><loc>https://scifaro.com/en/abs/improved-multipath-time-delay-estimation-using-cepstrum-subtraction-1810.11990</loc><lastmod>2018-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-multipath-time-delay-estimation-using-cepstrum-subtraction-1810.11990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-multipath-time-delay-estimation-using-cepstrum-subtraction-1810.11990"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-hybrid-ctc-attention-model-for-speech-recognition-1810.12020</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-hybrid-ctc-attention-model-for-speech-recognition-1810.12020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-hybrid-ctc-attention-model-for-speech-recognition-1810.12020"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-style-adaptation-in-text-to-speech-synthesis-using-sequence-to-sequence-models-with-attention-1810.12051</loc><lastmod>2018-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-style-adaptation-in-text-to-speech-synthesis-using-sequence-to-sequence-models-with-attention-1810.12051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-style-adaptation-in-text-to-speech-synthesis-using-sequence-to-sequence-models-with-attention-1810.12051"/></url>
<url><loc>https://scifaro.com/en/abs/audio-inpainting-of-music-by-means-of-neural-networks-1810.12138</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-inpainting-of-music-by-means-of-neural-networks-1810.12138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-inpainting-of-music-by-means-of-neural-networks-1810.12138"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-music-source-separation-is-it-possible-in-the-waveform-domain-1810.12187</loc><lastmod>2019-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-music-source-separation-is-it-possible-in-the-waveform-domain-1810.12187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-music-source-separation-is-it-possible-in-the-waveform-domain-1810.12187"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-factorized-piano-music-modeling-and-generation-with-the-maestro-dataset-1810.12247</loc><lastmod>2019-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-factorized-piano-music-modeling-and-generation-with-the-maestro-dataset-1810.12247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-factorized-piano-music-modeling-and-generation-with-the-maestro-dataset-1810.12247"/></url>
<url><loc>https://scifaro.com/en/abs/the-airbus-air-traffic-control-speech-recognition-2018-challenge-towards-atc-automatic-transcription-and-call-sign-detection-1810.12614</loc><lastmod>2020-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-airbus-air-traffic-control-speech-recognition-2018-challenge-towards-atc-automatic-transcription-and-call-sign-detection-1810.12614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-airbus-air-traffic-control-speech-recognition-2018-challenge-towards-atc-automatic-transcription-and-call-sign-detection-1810.12614"/></url>
<url><loc>https://scifaro.com/en/abs/subspectralnet-using-sub-spectrogram-based-convolutional-neural-networks-for-acoustic-scene-classification-1810.12642</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspectralnet-using-sub-spectrogram-based-convolutional-neural-networks-for-acoustic-scene-classification-1810.12642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspectralnet-using-sub-spectrogram-based-convolutional-neural-networks-for-acoustic-scene-classification-1810.12642"/></url>
<url><loc>https://scifaro.com/en/abs/feature-trajectory-dynamic-time-warping-for-clustering-of-speech-segments-1810.12722</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-trajectory-dynamic-time-warping-for-clustering-of-speech-segments-1810.12722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-trajectory-dynamic-time-warping-for-clustering-of-speech-segments-1810.12722"/></url>
<url><loc>https://scifaro.com/en/abs/audio-source-separation-using-variational-autoencoders-and-weak-class-supervision-1810.13104</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-source-separation-using-variational-autoencoders-and-weak-class-supervision-1810.13104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-source-separation-using-variational-autoencoders-and-weak-class-supervision-1810.13104"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-spain-sparse-audio-inpainter-1810.13137</loc><lastmod>2020-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-spain-sparse-audio-inpainter-1810.13137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-spain-sparse-audio-inpainter-1810.13137"/></url>
<url><loc>https://scifaro.com/en/abs/mulan-a-blind-and-off-grid-method-for-multichannel-echo-retrieval-1810.13338</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mulan-a-blind-and-off-grid-method-for-multichannel-echo-retrieval-1810.13338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mulan-a-blind-and-off-grid-method-for-multichannel-echo-retrieval-1810.13338"/></url>
<url><loc>https://scifaro.com/en/abs/waveglow-a-flow-based-generative-network-for-speech-synthesis-1811.00002</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveglow-a-flow-based-generative-network-for-speech-synthesis-1811.00002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveglow-a-flow-based-generative-network-for-speech-synthesis-1811.00002"/></url>
<url><loc>https://scifaro.com/en/abs/deep-net-features-for-complex-emotion-recognition-1811.00003</loc><lastmod>2018-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-net-features-for-complex-emotion-recognition-1811.00003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-net-features-for-complex-emotion-recognition-1811.00003"/></url>
<url><loc>https://scifaro.com/en/abs/on-single-channel-speech-enhancement-and-on-non-linear-modulation-domain-kalman-filtering-1811.00078</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-single-channel-speech-enhancement-and-on-non-linear-modulation-domain-kalman-filtering-1811.00078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-single-channel-speech-enhancement-and-on-non-linear-modulation-domain-kalman-filtering-1811.00078"/></url>
<url><loc>https://scifaro.com/en/abs/neural-music-synthesis-for-flexible-timbre-control-1811.00223</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-music-synthesis-for-flexible-timbre-control-1811.00223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-music-synthesis-for-flexible-timbre-control-1811.00223"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-crnn-system-for-sound-event-detection-with-large-scale-unlabeled-in-domain-data-1811.00301</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-crnn-system-for-sound-event-detection-with-large-scale-unlabeled-in-domain-data-1811.00301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-crnn-system-for-sound-event-detection-with-large-scale-unlabeled-in-domain-data-1811.00301"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-models-for-small-footprint-keyword-spotting-1811.00348</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-models-for-small-footprint-keyword-spotting-1811.00348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-models-for-small-footprint-keyword-spotting-1811.00348"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-models-with-auditory-attention-in-multi-channel-keyword-spotting-1811.00350</loc><lastmod>2018-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-models-with-auditory-attention-in-multi-channel-keyword-spotting-1811.00350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-models-with-auditory-attention-in-multi-channel-keyword-spotting-1811.00350"/></url>
<url><loc>https://scifaro.com/en/abs/referenceless-performance-evaluation-of-audio-source-separation-using-deep-neural-networks-1811.00454</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/referenceless-performance-evaluation-of-audio-source-separation-using-deep-neural-networks-1811.00454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/referenceless-performance-evaluation-of-audio-source-separation-using-deep-neural-networks-1811.00454"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-features-fusion-using-attentive-multi-channel-deep-architecture-1811.00936</loc><lastmod>2018-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-features-fusion-using-attentive-multi-channel-deep-architecture-1811.00936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-features-fusion-using-attentive-multi-channel-deep-architecture-1811.00936"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-equal-length-snippets-how-long-is-sufficient-to-recognize-an-audio-scene-1811.01095</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-equal-length-snippets-how-long-is-sufficient-to-recognize-an-audio-scene-1811.01095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-equal-length-snippets-how-long-is-sufficient-to-recognize-an-audio-scene-1811.01095"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-for-frame-level-instrument-recognition-1811.01143</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-for-frame-level-instrument-recognition-1811.01143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-for-frame-level-instrument-recognition-1811.01143"/></url>
<url><loc>https://scifaro.com/en/abs/deep-ad-hoc-beamforming-1811.01233</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-ad-hoc-beamforming-1811.01233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-ad-hoc-beamforming-1811.01233"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-networks-for-multi-channel-audio-classification-1811.01251</loc><lastmod>2019-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-networks-for-multi-channel-audio-classification-1811.01251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-networks-for-multi-channel-audio-classification-1811.01251"/></url>
<url><loc>https://scifaro.com/en/abs/convs2s-vc-fully-convolutional-sequence-to-sequence-voice-conversion-1811.01609</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convs2s-vc-fully-convolutional-sequence-to-sequence-voice-conversion-1811.01609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convs2s-vc-fully-convolutional-sequence-to-sequence-voice-conversion-1811.01609"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-sound-source-separation-conditioned-on-instrument-labels-1811.01850</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-sound-source-separation-conditioned-on-instrument-labels-1811.01850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-sound-source-separation-conditioned-on-instrument-labels-1811.01850"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-improve-your-speaker-embeddings-extractor-in-generic-toolkits-1811.02066</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-improve-your-speaker-embeddings-extractor-in-generic-toolkits-1811.02066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-improve-your-speaker-embeddings-extractor-in-generic-toolkits-1811.02066"/></url>
<url><loc>https://scifaro.com/en/abs/bootstrapping-single-channel-source-separation-via-unsupervised-spatial-clustering-on-stereo-mixtures-1811.02130</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bootstrapping-single-channel-source-separation-via-unsupervised-spatial-clustering-on-stereo-mixtures-1811.02130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bootstrapping-single-channel-source-separation-via-unsupervised-spatial-clustering-on-stereo-mixtures-1811.02130"/></url>
<url><loc>https://scifaro.com/en/abs/flowavenet-a-generative-flow-for-raw-audio-1811.02155</loc><lastmod>2019-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowavenet-a-generative-flow-for-raw-audio-1811.02155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowavenet-a-generative-flow-for-raw-audio-1811.02155"/></url>
<url><loc>https://scifaro.com/en/abs/nips4bplus-a-richly-annotated-birdsong-audio-dataset-1811.02275</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nips4bplus-a-richly-annotated-birdsong-audio-dataset-1811.02275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nips4bplus-a-richly-annotated-birdsong-audio-dataset-1811.02275"/></url>
<url><loc>https://scifaro.com/en/abs/user-specific-adaptation-in-automatic-transcription-of-vocalised-percussion-1811.02406</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/user-specific-adaptation-in-automatic-transcription-of-vocalised-percussion-1811.02406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/user-specific-adaptation-in-automatic-transcription-of-vocalised-percussion-1811.02406"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-only-method-for-advertisement-detection-in-broadcast-television-content-1811.02411</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-only-method-for-advertisement-detection-in-broadcast-television-content-1811.02411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-only-method-for-advertisement-detection-in-broadcast-television-content-1811.02411"/></url>
<url><loc>https://scifaro.com/en/abs/sdr-half-baked-or-well-done-1811.02508</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sdr-half-baked-or-well-done-1811.02508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sdr-half-baked-or-well-done-1811.02508"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-speech-stimuli-from-human-auditory-cortex-activity-using-a-wavenet-approach-1811.02694</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-speech-stimuli-from-human-auditory-cortex-activity-using-a-wavenet-approach-1811.02694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-speech-stimuli-from-human-auditory-cortex-activity-using-a-wavenet-approach-1811.02694"/></url>
<url><loc>https://scifaro.com/en/abs/class-conditional-embeddings-for-music-source-separation-1811.03076</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/class-conditional-embeddings-for-music-source-separation-1811.03076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/class-conditional-embeddings-for-music-source-separation-1811.03076"/></url>
<url><loc>https://scifaro.com/en/abs/learning-disentangled-representations-for-timber-and-pitch-in-music-audio-1811.03271</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-disentangled-representations-for-timber-and-pitch-in-music-audio-1811.03271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-disentangled-representations-for-timber-and-pitch-in-music-audio-1811.03271"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-recurrence-dynamics-for-speech-emotion-recognition-1811.04133</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-recurrence-dynamics-for-speech-emotion-recognition-1811.04133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-recurrence-dynamics-for-speech-emotion-recognition-1811.04133"/></url>
<url><loc>https://scifaro.com/en/abs/audio-spectrogram-factorization-for-classification-of-telephony-signals-below-the-auditory-threshold-1811.04139</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-spectrogram-factorization-for-classification-of-telephony-signals-below-the-auditory-threshold-1811.04139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-spectrogram-factorization-for-classification-of-telephony-signals-below-the-auditory-threshold-1811.04139"/></url>
<url><loc>https://scifaro.com/en/abs/performancenet-score-to-audio-music-generation-with-multi-band-convolutional-residual-network-1811.04357</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performancenet-score-to-audio-music-generation-with-multi-band-convolutional-residual-network-1811.04357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performancenet-score-to-audio-music-generation-with-multi-band-convolutional-residual-network-1811.04357"/></url>
<url><loc>https://scifaro.com/en/abs/multi-temporal-resolution-convolutional-neural-networks-for-acoustic-scene-classification-1811.04419</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-temporal-resolution-convolutional-neural-networks-for-acoustic-scene-classification-1811.04419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-temporal-resolution-convolutional-neural-networks-for-acoustic-scene-classification-1811.04419"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-modal-deep-neural-network-approach-to-bird-song-identification-1811.04448</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-modal-deep-neural-network-approach-to-bird-song-identification-1811.04448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-modal-deep-neural-network-approach-to-bird-song-identification-1811.04448"/></url>
<url><loc>https://scifaro.com/en/abs/vectorization-of-hypotheses-and-speech-for-faster-beam-search-in-encoder-decoder-based-speech-recognition-1811.04568</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vectorization-of-hypotheses-and-speech-for-faster-beam-search-in-encoder-decoder-based-speech-recognition-1811.04568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vectorization-of-hypotheses-and-speech-for-faster-beam-search-in-encoder-decoder-based-speech-recognition-1811.04568"/></url>
<url><loc>https://scifaro.com/en/abs/neural-wavetable-a-playable-wavetable-synthesizer-using-neural-networks-1811.05550</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-wavetable-a-playable-wavetable-synthesizer-using-neural-networks-1811.05550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-wavetable-a-playable-wavetable-synthesizer-using-neural-networks-1811.05550"/></url>
<url><loc>https://scifaro.com/en/abs/to-bee-or-not-to-bee-investigating-machine-learning-approaches-for-beehive-sound-recognition-1811.06016</loc><lastmod>2021-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-bee-or-not-to-bee-investigating-machine-learning-approaches-for-beehive-sound-recognition-1811.06016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-bee-or-not-to-bee-investigating-machine-learning-approaches-for-beehive-sound-recognition-1811.06016"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-identification-of-beehive-states-1811.06330</loc><lastmod>2019-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-identification-of-beehive-states-1811.06330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-identification-of-beehive-states-1811.06330"/></url>
<url><loc>https://scifaro.com/en/abs/generating-albums-with-samplernn-to-imitate-metal-rock-and-punk-bands-1811.06633</loc><lastmod>2018-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-albums-with-samplernn-to-imitate-metal-rock-and-punk-bands-1811.06633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-albums-with-samplernn-to-imitate-metal-rock-and-punk-bands-1811.06633"/></url>
<url><loc>https://scifaro.com/en/abs/generating-black-metal-and-math-rock-beyond-bach-beethoven-and-beatles-1811.06639</loc><lastmod>2018-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-black-metal-and-math-rock-beyond-bach-beethoven-and-beatles-1811.06639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-black-metal-and-math-rock-beyond-bach-beethoven-and-beatles-1811.06639"/></url>
<url><loc>https://scifaro.com/en/abs/aclnet-efficient-end-to-end-audio-classification-cnn-1811.06669</loc><lastmod>2018-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aclnet-efficient-end-to-end-audio-classification-cnn-1811.06669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aclnet-efficient-end-to-end-audio-classification-cnn-1811.06669"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-multichannel-speech-enhancement-with-variational-autoencoders-and-non-negative-matrix-factorization-1811.06713</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-multichannel-speech-enhancement-with-variational-autoencoders-and-non-negative-matrix-factorization-1811.06713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-multichannel-speech-enhancement-with-variational-autoencoders-and-non-negative-matrix-factorization-1811.06713"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-estimation-of-wide-band-signals-with-planar-microphone-arrays-1811.06756</loc><lastmod>2018-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-wide-band-signals-with-planar-microphone-arrays-1811.06756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-wide-band-signals-with-planar-microphone-arrays-1811.06756"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-tradeoffs-in-models-for-low-latency-speech-enhancement-1811.07030</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-tradeoffs-in-models-for-low-latency-speech-enhancement-1811.07030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-tradeoffs-in-models-for-low-latency-speech-enhancement-1811.07030"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-audio-tagging-with-sequentially-labelled-data-using-crnn-with-learnable-gated-linear-units-1811.07072</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-audio-tagging-with-sequentially-labelled-data-using-crnn-with-learnable-gated-linear-units-1811.07072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-audio-tagging-with-sequentially-labelled-data-using-crnn-with-learnable-gated-linear-units-1811.07072"/></url>
<url><loc>https://scifaro.com/en/abs/the-intrinsic-memorability-of-everyday-sounds-1811.07082</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-intrinsic-memorability-of-everyday-sounds-1811.07082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-intrinsic-memorability-of-everyday-sounds-1811.07082"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-recomposition-using-conditional-autoregressive-modeling-1811.07426</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-recomposition-using-conditional-autoregressive-modeling-1811.07426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-recomposition-using-conditional-autoregressive-modeling-1811.07426"/></url>
<url><loc>https://scifaro.com/en/abs/limitations-of-source-filter-coupling-in-phonation-1811.07435</loc><lastmod>2018-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/limitations-of-source-filter-coupling-in-phonation-1811.07435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/limitations-of-source-filter-coupling-in-phonation-1811.07435"/></url>
<url><loc>https://scifaro.com/en/abs/sound-stream-ii-towards-real-time-gesture-controlled-articulatory-sound-synthesis-1811.08029</loc><lastmod>2018-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-stream-ii-towards-real-time-gesture-controlled-articulatory-sound-synthesis-1811.08029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-stream-ii-towards-real-time-gesture-controlled-articulatory-sound-synthesis-1811.08029"/></url>
<url><loc>https://scifaro.com/en/abs/coupled-recurrent-models-for-polyphonic-music-composition-1811.08045</loc><lastmod>2019-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coupled-recurrent-models-for-polyphonic-music-composition-1811.08045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coupled-recurrent-models-for-polyphonic-music-composition-1811.08045"/></url>
<url><loc>https://scifaro.com/en/abs/improving-sequence-to-sequence-acoustic-modeling-by-adding-text-supervision-1811.08111</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-sequence-to-sequence-acoustic-modeling-by-adding-text-supervision-1811.08111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-sequence-to-sequence-acoustic-modeling-by-adding-text-supervision-1811.08111"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-explicit-structure-encoding-of-deep-neural-networks-for-symbolic-music-generation-1811.08380</loc><lastmod>2020-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-explicit-structure-encoding-of-deep-neural-networks-for-symbolic-music-generation-1811.08380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-explicit-structure-encoding-of-deep-neural-networks-for-symbolic-music-generation-1811.08380"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-consistency-constraints-for-improved-deep-speech-enhancement-1811.08521</loc><lastmod>2018-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-consistency-constraints-for-improved-deep-speech-enhancement-1811.08521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-consistency-constraints-for-improved-deep-speech-enhancement-1811.08521"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-phase-reconstruction-for-speaker-separation-a-trigonometric-perspective-1811.09010</loc><lastmod>2018-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-phase-reconstruction-for-speaker-separation-a-trigonometric-perspective-1811.09010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-phase-reconstruction-for-speaker-separation-a-trigonometric-perspective-1811.09010"/></url>
<url><loc>https://scifaro.com/en/abs/training-multi-task-adversarial-network-for-extracting-noise-robust-speaker-embedding-1811.09355</loc><lastmod>2019-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-multi-task-adversarial-network-for-extracting-noise-robust-speaker-embedding-1811.09355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-multi-task-adversarial-network-for-extracting-noise-robust-speaker-embedding-1811.09355"/></url>
<url><loc>https://scifaro.com/en/abs/improved-frequency-modulation-features-for-multichannel-distant-speech-recognition-1811.09381</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-frequency-modulation-features-for-multichannel-distant-speech-recognition-1811.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-frequency-modulation-features-for-multichannel-distant-speech-recognition-1811.09381"/></url>
<url><loc>https://scifaro.com/en/abs/towards-emotion-recognition-a-persistent-entropy-application-1811.09607</loc><lastmod>2019-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-emotion-recognition-a-persistent-entropy-application-1811.09607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-emotion-recognition-a-persistent-entropy-application-1811.09607"/></url>
<url><loc>https://scifaro.com/en/abs/timbretron-a-wavenet-cyclegan-cqt-audio-pipeline-for-musical-timbre-transfer-1811.09620</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbretron-a-wavenet-cyclegan-cqt-audio-pipeline-for-musical-timbre-transfer-1811.09620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbretron-a-wavenet-cyclegan-cqt-audio-pipeline-for-musical-timbre-transfer-1811.09620"/></url>
<url><loc>https://scifaro.com/en/abs/glottal-closure-instants-detection-from-pathological-acoustic-speech-signal-using-deep-learning-1811.09956</loc><lastmod>2018-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glottal-closure-instants-detection-from-pathological-acoustic-speech-signal-using-deep-learning-1811.09956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glottal-closure-instants-detection-from-pathological-acoustic-speech-signal-using-deep-learning-1811.09956"/></url>
<url><loc>https://scifaro.com/en/abs/learning-sound-events-from-webly-labeled-data-1811.09967</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-sound-events-from-webly-labeled-data-1811.09967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-sound-events-from-webly-labeled-data-1811.09967"/></url>
<url><loc>https://scifaro.com/en/abs/combining-high-level-features-of-raw-audio-waves-and-mel-spectrograms-for-audio-tagging-1811.10708</loc><lastmod>2018-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-high-level-features-of-raw-audio-waves-and-mel-spectrograms-for-audio-tagging-1811.10708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-high-level-features-of-raw-audio-waves-and-mel-spectrograms-for-audio-tagging-1811.10708"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speech-enhancement-with-the-wave-u-net-1811.11307</loc><lastmod>2018-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speech-enhancement-with-the-wave-u-net-1811.11307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speech-enhancement-with-the-wave-u-net-1811.11307"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-source-direction-of-arrival-estimation-using-subspace-pseudointensity-vectors-1811.11663</loc><lastmod>2018-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-source-direction-of-arrival-estimation-using-subspace-pseudointensity-vectors-1811.11663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-source-direction-of-arrival-estimation-using-subspace-pseudointensity-vectors-1811.11663"/></url>
<url><loc>https://scifaro.com/en/abs/ufans-u-shaped-fully-parallel-acoustic-neural-structure-for-statistical-parametric-speech-synthesis-with-20x-faster-1811.12208</loc><lastmod>2018-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ufans-u-shaped-fully-parallel-acoustic-neural-structure-for-statistical-parametric-speech-synthesis-with-20x-faster-1811.12208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ufans-u-shaped-fully-parallel-acoustic-neural-structure-for-statistical-parametric-speech-synthesis-with-20x-faster-1811.12208"/></url>
<url><loc>https://scifaro.com/en/abs/play-as-you-like-timbre-enhanced-multi-modal-music-style-transfer-1811.12214</loc><lastmod>2018-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/play-as-you-like-timbre-enhanced-multi-modal-music-style-transfer-1811.12214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/play-as-you-like-timbre-enhanced-multi-modal-music-style-transfer-1811.12214"/></url>
<url><loc>https://scifaro.com/en/abs/from-context-to-concept-exploring-semantic-relationships-in-music-with-word2vec-1811.12408</loc><lastmod>2018-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-context-to-concept-exploring-semantic-relationships-in-music-with-word2vec-1811.12408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-context-to-concept-exploring-semantic-relationships-in-music-with-word2vec-1811.12408"/></url>
<url><loc>https://scifaro.com/en/abs/bach2bach-generating-music-using-a-deep-reinforcement-learning-approach-1812.01060</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bach2bach-generating-music-using-a-deep-reinforcement-learning-approach-1812.01060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bach2bach-generating-music-using-a-deep-reinforcement-learning-approach-1812.01060"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-match-transient-sound-events-using-attentional-similarity-for-few-shot-sound-recognition-1812.01269</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-match-transient-sound-events-using-attentional-similarity-for-few-shot-sound-recognition-1812.01269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-match-transient-sound-events-using-attentional-similarity-for-few-shot-sound-recognition-1812.01269"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-separation-using-a-deep-convolutional-neural-network-trained-by-ideal-binary-mask-and-cross-entropy-1812.01278</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-separation-using-a-deep-convolutional-neural-network-trained-by-ideal-binary-mask-and-cross-entropy-1812.01278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-separation-using-a-deep-convolutional-neural-network-trained-by-ideal-binary-mask-and-cross-entropy-1812.01278"/></url>
<url><loc>https://scifaro.com/en/abs/localization-and-tracking-of-an-acoustic-source-using-a-diagonal-unloading-beamforming-and-a-kalman-filter-1812.01521</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-and-tracking-of-an-acoustic-source-using-a-diagonal-unloading-beamforming-and-a-kalman-filter-1812.01521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-and-tracking-of-an-acoustic-source-using-a-diagonal-unloading-beamforming-and-a-kalman-filter-1812.01521"/></url>
<url><loc>https://scifaro.com/en/abs/intensity-particle-flow-smc-phd-filter-for-audio-speaker-tracking-1812.01570</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intensity-particle-flow-smc-phd-filter-for-audio-speaker-tracking-1812.01570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intensity-particle-flow-smc-phd-filter-for-audio-speaker-tracking-1812.01570"/></url>
<url><loc>https://scifaro.com/en/abs/domain-mismatch-robust-acoustic-scene-classification-using-channel-information-conversion-1812.01731</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-mismatch-robust-acoustic-scene-classification-using-channel-information-conversion-1812.01731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-mismatch-robust-acoustic-scene-classification-using-channel-information-conversion-1812.01731"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-source-localization-based-on-modulation-domain-features-and-decision-pooling-1812.02399</loc><lastmod>2018-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-source-localization-based-on-modulation-domain-features-and-decision-pooling-1812.02399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-source-localization-based-on-modulation-domain-features-and-decision-pooling-1812.02399"/></url>
<url><loc>https://scifaro.com/en/abs/estimates-of-the-reconstruction-error-in-partially-redressed-warped-frames-expansions-1812.03279</loc><lastmod>2018-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimates-of-the-reconstruction-error-in-partially-redressed-warped-frames-expansions-1812.03279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimates-of-the-reconstruction-error-in-partially-redressed-warped-frames-expansions-1812.03279"/></url>
<url><loc>https://scifaro.com/en/abs/increase-apparent-public-speaking-fluency-by-speech-augmentation-1812.03415</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/increase-apparent-public-speaking-fluency-by-speech-augmentation-1812.03415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/increase-apparent-public-speaking-fluency-by-speech-augmentation-1812.03415"/></url>
<url><loc>https://scifaro.com/en/abs/a-computationally-efficient-and-practically-feasible-two-microphones-blind-speech-separation-method-1812.03914</loc><lastmod>2018-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computationally-efficient-and-practically-feasible-two-microphones-blind-speech-separation-method-1812.03914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computationally-efficient-and-practically-feasible-two-microphones-blind-speech-separation-method-1812.03914"/></url>
<url><loc>https://scifaro.com/en/abs/an-individualized-super-gaussian-single-microphone-speech-enhancement-for-hearing-aid-users-with-smartphone-as-an-assistive-device-1812.03916</loc><lastmod>2019-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-individualized-super-gaussian-single-microphone-speech-enhancement-for-hearing-aid-users-with-smartphone-as-an-assistive-device-1812.03916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-individualized-super-gaussian-single-microphone-speech-enhancement-for-hearing-aid-users-with-smartphone-as-an-assistive-device-1812.03916"/></url>
<url><loc>https://scifaro.com/en/abs/a-functional-taxonomy-of-music-generation-systems-1812.04186</loc><lastmod>2018-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-functional-taxonomy-of-music-generation-systems-1812.04186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-functional-taxonomy-of-music-generation-systems-1812.04186"/></url>
<url><loc>https://scifaro.com/en/abs/a-cascaded-multiple-speaker-localization-and-tracking-system-1812.04417</loc><lastmod>2018-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cascaded-multiple-speaker-localization-and-tracking-system-1812.04417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cascaded-multiple-speaker-localization-and-tracking-system-1812.04417"/></url>
<url><loc>https://scifaro.com/en/abs/morpheus-generating-structured-music-with-constrained-patterns-and-tension-1812.04832</loc><lastmod>2018-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/morpheus-generating-structured-music-with-constrained-patterns-and-tension-1812.04832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/morpheus-generating-structured-music-with-constrained-patterns-and-tension-1812.04832"/></url>
<url><loc>https://scifaro.com/en/abs/description-of-algorithms-for-ben-gurion-university-submission-to-the-locata-challenge-1812.04942</loc><lastmod>2018-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-of-algorithms-for-ben-gurion-university-submission-to-the-locata-challenge-1812.04942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-of-algorithms-for-ben-gurion-university-submission-to-the-locata-challenge-1812.04942"/></url>
<url><loc>https://scifaro.com/en/abs/parameterization-of-sequence-of-mfccs-for-dnn-based-voice-disorder-detection-1812.05888</loc><lastmod>2018-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameterization-of-sequence-of-mfccs-for-dnn-based-voice-disorder-detection-1812.05888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameterization-of-sequence-of-mfccs-for-dnn-based-voice-disorder-detection-1812.05888"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-an-open-source-implementation-of-the-srp-phat-algorithm-within-the-2018-locata-challenge-1812.05901</loc><lastmod>2018-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-an-open-source-implementation-of-the-srp-phat-algorithm-within-the-2018-locata-challenge-1812.05901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-an-open-source-implementation-of-the-srp-phat-algorithm-within-the-2018-locata-challenge-1812.05901"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-monaural-singing-voice-separation-with-a-masking-network-trained-on-synthetic-mixtures-1812.06087</loc><lastmod>2019-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-monaural-singing-voice-separation-with-a-masking-network-trained-on-synthetic-mixtures-1812.06087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-monaural-singing-voice-separation-with-a-masking-network-trained-on-synthetic-mixtures-1812.06087"/></url>
<url><loc>https://scifaro.com/en/abs/inversynth-deep-estimation-of-synthesizer-parameter-configurations-from-audio-signals-1812.06349</loc><lastmod>2019-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inversynth-deep-estimation-of-synthesizer-parameter-configurations-from-audio-signals-1812.06349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inversynth-deep-estimation-of-synthesizer-parameter-configurations-from-audio-signals-1812.06349"/></url>
<url><loc>https://scifaro.com/en/abs/voiceprint-recognition-of-parkinson-patients-based-on-deep-learning-1812.06613</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceprint-recognition-of-parkinson-patients-based-on-deep-learning-1812.06613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceprint-recognition-of-parkinson-patients-based-on-deep-learning-1812.06613"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-generate-music-with-bachprop-1812.06669</loc><lastmod>2020-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-generate-music-with-bachprop-1812.06669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-generate-music-with-bachprop-1812.06669"/></url>
<url><loc>https://scifaro.com/en/abs/circular-statistics-based-low-complexity-doa-estimation-for-hearing-aid-application-1812.06697</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/circular-statistics-based-low-complexity-doa-estimation-for-hearing-aid-application-1812.06697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/circular-statistics-based-low-complexity-doa-estimation-for-hearing-aid-application-1812.06697"/></url>
<url><loc>https://scifaro.com/en/abs/persian-vowel-recognition-with-mfcc-and-ann-on-pcvc-speech-dataset-1812.06953</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/persian-vowel-recognition-with-mfcc-and-ann-on-pcvc-speech-dataset-1812.06953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/persian-vowel-recognition-with-mfcc-and-ann-on-pcvc-speech-dataset-1812.06953"/></url>
<url><loc>https://scifaro.com/en/abs/instrument-independent-dastgah-recognition-of-iranian-classical-music-using-azarnet-1812.07017</loc><lastmod>2019-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instrument-independent-dastgah-recognition-of-iranian-classical-music-using-azarnet-1812.07017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instrument-independent-dastgah-recognition-of-iranian-classical-music-using-azarnet-1812.07017"/></url>
<url><loc>https://scifaro.com/en/abs/bandnet-a-neural-network-based-multi-instrument-beatles-style-midi-music-composition-machine-1812.07126</loc><lastmod>2018-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandnet-a-neural-network-based-multi-instrument-beatles-style-midi-music-composition-machine-1812.07126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandnet-a-neural-network-based-multi-instrument-beatles-style-midi-music-composition-machine-1812.07126"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoder-based-architecture-for-fast-real-time-audio-style-transfer-1812.07159</loc><lastmod>2018-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoder-based-architecture-for-fast-real-time-audio-style-transfer-1812.07159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoder-based-architecture-for-fast-real-time-audio-style-transfer-1812.07159"/></url>
<url><loc>https://scifaro.com/en/abs/uniform-convergence-bounds-for-codec-selection-1812.07568</loc><lastmod>2018-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniform-convergence-bounds-for-codec-selection-1812.07568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniform-convergence-bounds-for-codec-selection-1812.07568"/></url>
<url><loc>https://scifaro.com/en/abs/tracking-multiple-audio-sources-with-the-von-mises-distribution-and-variational-em-1812.08246</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tracking-multiple-audio-sources-with-the-von-mises-distribution-and-variational-em-1812.08246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tracking-multiple-audio-sources-with-the-von-mises-distribution-and-variational-em-1812.08246"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-online-dereverberation-based-on-spectral-magnitude-inverse-filtering-1812.08471</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-online-dereverberation-based-on-spectral-magnitude-inverse-filtering-1812.08471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-online-dereverberation-based-on-spectral-magnitude-inverse-filtering-1812.08471"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-supervector-extraction-for-encoding-speaker-and-phrase-information-in-text-dependent-speaker-verification-1812.09484</loc><lastmod>2018-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-supervector-extraction-for-encoding-speaker-and-phrase-information-in-text-dependent-speaker-verification-1812.09484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-supervector-extraction-for-encoding-speaker-and-phrase-information-in-text-dependent-speaker-verification-1812.09484"/></url>
<url><loc>https://scifaro.com/en/abs/noise-flooding-for-detecting-audio-adversarial-examples-against-automatic-speech-recognition-1812.10061</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-flooding-for-detecting-audio-adversarial-examples-against-automatic-speech-recognition-1812.10061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-flooding-for-detecting-audio-adversarial-examples-against-automatic-speech-recognition-1812.10061"/></url>
<url><loc>https://scifaro.com/en/abs/tensor-train-long-short-term-memory-for-monaural-speech-enhancement-1812.10095</loc><lastmod>2018-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tensor-train-long-short-term-memory-for-monaural-speech-enhancement-1812.10095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tensor-train-long-short-term-memory-for-monaural-speech-enhancement-1812.10095"/></url>
<url><loc>https://scifaro.com/en/abs/a-multiversion-programming-inspired-approach-to-detecting-audio-adversarial-examples-1812.10199</loc><lastmod>2019-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multiversion-programming-inspired-approach-to-detecting-audio-adversarial-examples-1812.10199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multiversion-programming-inspired-approach-to-detecting-audio-adversarial-examples-1812.10199"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-automated-pop-song-melody-generation-with-piano-accompaniment-arrangement-1812.10906</loc><lastmod>2018-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-automated-pop-song-melody-generation-with-piano-accompaniment-arrangement-1812.10906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-automated-pop-song-melody-generation-with-piano-accompaniment-arrangement-1812.10906"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-model-for-speech-enhancement-by-consistent-spectrogram-masking-1901.00295</loc><lastmod>2019-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-model-for-speech-enhancement-by-consistent-spectrogram-masking-1901.00295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-model-for-speech-enhancement-by-consistent-spectrogram-masking-1901.00295"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-enhancement-for-reverberated-and-noisy-signals-using-wide-residual-networks-1901.00660</loc><lastmod>2019-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-enhancement-for-reverberated-and-noisy-signals-using-wide-residual-networks-1901.00660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-enhancement-for-reverberated-and-noisy-signals-using-wide-residual-networks-1901.00660"/></url>
<url><loc>https://scifaro.com/en/abs/feature-reinforcement-with-word-embedding-and-parsing-information-in-neural-tts-1901.00707</loc><lastmod>2019-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-reinforcement-with-word-embedding-and-parsing-information-in-neural-tts-1901.00707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-reinforcement-with-word-embedding-and-parsing-information-in-neural-tts-1901.00707"/></url>
<url><loc>https://scifaro.com/en/abs/introduction-to-voice-presentation-attack-detection-and-recent-advances-1901.01085</loc><lastmod>2019-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introduction-to-voice-presentation-attack-detection-and-recent-advances-1901.01085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introduction-to-voice-presentation-attack-detection-and-recent-advances-1901.01085"/></url>
<url><loc>https://scifaro.com/en/abs/learning-sound-event-classifiers-from-web-audio-with-noisy-labels-1901.01189</loc><lastmod>2019-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-sound-event-classifiers-from-web-audio-with-noisy-labels-1901.01189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-sound-event-classifiers-from-web-audio-with-noisy-labels-1901.01189"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-sound-texture-in-cnn-based-acoustic-scene-classification-1901.01502</loc><lastmod>2019-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-sound-texture-in-cnn-based-acoustic-scene-classification-1901.01502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-sound-texture-in-cnn-based-acoustic-scene-classification-1901.01502"/></url>
<url><loc>https://scifaro.com/en/abs/sinusoidal-wave-generating-network-based-on-adversarial-learning-and-its-application-synthesizing-frog-sounds-for-data-augmentation-1901.02050</loc><lastmod>2019-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sinusoidal-wave-generating-network-based-on-adversarial-learning-and-its-application-synthesizing-frog-sounds-for-data-augmentation-1901.02050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sinusoidal-wave-generating-network-based-on-adversarial-learning-and-its-application-synthesizing-frog-sounds-for-data-augmentation-1901.02050"/></url>
<url><loc>https://scifaro.com/en/abs/presence-absence-estimation-in-audio-recordings-of-tropical-frog-communities-1901.02495</loc><lastmod>2019-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/presence-absence-estimation-in-audio-recordings-of-tropical-frog-communities-1901.02495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/presence-absence-estimation-in-audio-recordings-of-tropical-frog-communities-1901.02495"/></url>
<url><loc>https://scifaro.com/en/abs/cosine-similarity-penalty-to-discriminate-sound-classes-in-weakly-supervised-sound-event-detection-1901.03146</loc><lastmod>2019-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosine-similarity-penalty-to-discriminate-sound-classes-in-weakly-supervised-sound-event-detection-1901.03146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosine-similarity-penalty-to-discriminate-sound-classes-in-weakly-supervised-sound-event-detection-1901.03146"/></url>
<url><loc>https://scifaro.com/en/abs/ubiquitous-acoustic-sensing-on-commodity-iot-devices-a-survey-1901.03450</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ubiquitous-acoustic-sensing-on-commodity-iot-devices-a-survey-1901.03450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ubiquitous-acoustic-sensing-on-commodity-iot-devices-a-survey-1901.03450"/></url>
<url><loc>https://scifaro.com/en/abs/prototypical-metric-transfer-learning-for-continuous-speech-keyword-spotting-with-limited-training-data-1901.03860</loc><lastmod>2019-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prototypical-metric-transfer-learning-for-continuous-speech-keyword-spotting-with-limited-training-data-1901.03860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prototypical-metric-transfer-learning-for-continuous-speech-keyword-spotting-with-limited-training-data-1901.03860"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-for-the-recognition-of-emotion-in-the-speech-of-couples-in-psychotherapy-using-the-stanford-suppes-brain-lab-psychotherapy-dataset-1901.04110</loc><lastmod>2019-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-for-the-recognition-of-emotion-in-the-speech-of-couples-in-psychotherapy-using-the-stanford-suppes-brain-lab-psychotherapy-dataset-1901.04110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-for-the-recognition-of-emotion-in-the-speech-of-couples-in-psychotherapy-using-the-stanford-suppes-brain-lab-psychotherapy-dataset-1901.04110"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-transfer-learning-for-low-resource-emotional-tts-1901.04276</loc><lastmod>2019-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-transfer-learning-for-low-resource-emotional-tts-1901.04276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-transfer-learning-for-low-resource-emotional-tts-1901.04276"/></url>
<url><loc>https://scifaro.com/en/abs/music-artist-classification-with-convolutional-recurrent-neural-networks-1901.04555</loc><lastmod>2019-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-artist-classification-with-convolutional-recurrent-neural-networks-1901.04555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-artist-classification-with-convolutional-recurrent-neural-networks-1901.04555"/></url>
<url><loc>https://scifaro.com/en/abs/classical-music-generation-in-distinct-dastgahs-with-alimnet-acgan-1901.04696</loc><lastmod>2019-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classical-music-generation-in-distinct-dastgahs-with-alimnet-acgan-1901.04696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classical-music-generation-in-distinct-dastgahs-with-alimnet-acgan-1901.04696"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-based-persian-speech-recognition-1901.04699</loc><lastmod>2019-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-based-persian-speech-recognition-1901.04699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-based-persian-speech-recognition-1901.04699"/></url>
<url><loc>https://scifaro.com/en/abs/spectrogram-feature-losses-for-music-source-separation-1901.05061</loc><lastmod>2019-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrogram-feature-losses-for-music-source-separation-1901.05061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrogram-feature-losses-for-music-source-separation-1901.05061"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-using-gain-adapted-factorial-hidden-markov-models-1901.07604</loc><lastmod>2019-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-using-gain-adapted-factorial-hidden-markov-models-1901.07604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-using-gain-adapted-factorial-hidden-markov-models-1901.07604"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stream-network-with-temporal-attention-for-environmental-sound-classification-1901.08608</loc><lastmod>2019-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stream-network-with-temporal-attention-for-environmental-sound-classification-1901.08608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stream-network-with-temporal-attention-for-environmental-sound-classification-1901.08608"/></url>
<url><loc>https://scifaro.com/en/abs/bottom-up-broadcast-neural-network-for-music-genre-classification-1901.08928</loc><lastmod>2019-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bottom-up-broadcast-neural-network-for-music-genre-classification-1901.08928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bottom-up-broadcast-neural-network-for-music-genre-classification-1901.08928"/></url>
<url><loc>https://scifaro.com/en/abs/locata-challenge-speaker-localization-with-a-planar-array-1901.08983</loc><lastmod>2019-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locata-challenge-speaker-localization-with-a-planar-array-1901.08983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locata-challenge-speaker-localization-with-a-planar-array-1901.08983"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-joint-sdr-and-pesq-optimization-1901.09146</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-joint-sdr-and-pesq-optimization-1901.09146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-joint-sdr-and-pesq-optimization-1901.09146"/></url>
<url><loc>https://scifaro.com/en/abs/applying-visual-domain-style-transfer-and-texture-synthesis-techniques-to-audio-insights-and-challenges-1901.10240</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-visual-domain-style-transfer-and-texture-synthesis-techniques-to-audio-insights-and-challenges-1901.10240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-visual-domain-style-transfer-and-texture-synthesis-techniques-to-audio-insights-and-challenges-1901.10240"/></url>
<url><loc>https://scifaro.com/en/abs/discriminate-natural-versus-loudspeaker-emitted-speech-1901.11291</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminate-natural-versus-loudspeaker-emitted-speech-1901.11291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminate-natural-versus-loudspeaker-emitted-speech-1901.11291"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-the-area-under-the-roc-curve-using-neural-network-supervectors-for-text-dependent-speaker-verification-1901.11332</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-the-area-under-the-roc-curve-using-neural-network-supervectors-for-text-dependent-speaker-verification-1901.11332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-the-area-under-the-roc-curve-using-neural-network-supervectors-for-text-dependent-speaker-verification-1901.11332"/></url>
<url><loc>https://scifaro.com/en/abs/is-cqt-more-suitable-for-monaural-speech-separation-than-stft-an-empirical-study-1902.00631</loc><lastmod>2019-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-cqt-more-suitable-for-monaural-speech-separation-than-stft-an-empirical-study-1902.00631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-cqt-more-suitable-for-monaural-speech-separation-than-stft-an-empirical-study-1902.00631"/></url>
<url><loc>https://scifaro.com/en/abs/furcanet-an-end-to-end-deep-gated-convolutional-long-short-term-memory-deep-neural-networks-for-single-channel-speech-separation-1902.00651</loc><lastmod>2019-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/furcanet-an-end-to-end-deep-gated-convolutional-long-short-term-memory-deep-neural-networks-for-single-channel-speech-separation-1902.00651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/furcanet-an-end-to-end-deep-gated-convolutional-long-short-term-memory-deep-neural-networks-for-single-channel-speech-separation-1902.00651"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-using-graph-laplacian-regularization-based-on-event-co-occurrence-1902.00816</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-using-graph-laplacian-regularization-based-on-event-co-occurrence-1902.00816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-using-graph-laplacian-regularization-based-on-event-co-occurrence-1902.00816"/></url>
<url><loc>https://scifaro.com/en/abs/deep-autotuner-a-data-driven-approach-to-natural-sounding-pitch-correction-for-singing-voice-in-karaoke-performances-1902.00956</loc><lastmod>2019-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-autotuner-a-data-driven-approach-to-natural-sounding-pitch-correction-for-singing-voice-in-karaoke-performances-1902.00956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-autotuner-a-data-driven-approach-to-natural-sounding-pitch-correction-for-singing-voice-in-karaoke-performances-1902.00956"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-svm-based-approach-for-voice-activity-detection-1902.01544</loc><lastmod>2019-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-svm-based-approach-for-voice-activity-detection-1902.01544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-svm-based-approach-for-voice-activity-detection-1902.01544"/></url>
<url><loc>https://scifaro.com/en/abs/a-variance-modeling-framework-based-on-variational-autoencoders-for-speech-enhancement-1902.01605</loc><lastmod>2019-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variance-modeling-framework-based-on-variational-autoencoders-for-speech-enhancement-1902.01605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variance-modeling-framework-based-on-variational-autoencoders-for-speech-enhancement-1902.01605"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-music-composition-with-lstm-neural-networks-and-reinforcement-learning-1902.01973</loc><lastmod>2019-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-music-composition-with-lstm-neural-networks-and-reinforcement-learning-1902.01973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-music-composition-with-lstm-neural-networks-and-reinforcement-learning-1902.01973"/></url>
<url><loc>https://scifaro.com/en/abs/hide-and-speak-towards-deep-neural-networks-for-speech-steganography-1902.03083</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hide-and-speak-towards-deep-neural-networks-for-speech-steganography-1902.03083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hide-and-speak-towards-deep-neural-networks-for-speech-steganography-1902.03083"/></url>
<url><loc>https://scifaro.com/en/abs/generative-moment-matching-network-based-random-modulation-post-filter-for-dnn-based-singing-voice-synthesis-and-neural-double-tracking-1902.03389</loc><lastmod>2019-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-moment-matching-network-based-random-modulation-post-filter-for-dnn-based-singing-voice-synthesis-and-neural-double-tracking-1902.03389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-moment-matching-network-based-random-modulation-post-filter-for-dnn-based-singing-voice-synthesis-and-neural-double-tracking-1902.03389"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-variational-autoencoders-and-alpha-stable-distributions-1902.03926</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-variational-autoencoders-and-alpha-stable-distributions-1902.03926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-variational-autoencoders-and-alpha-stable-distributions-1902.03926"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-generation-of-time-frequency-features-with-application-in-audio-synthesis-1902.04072</loc><lastmod>2019-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-generation-of-time-frequency-features-with-application-in-audio-synthesis-1902.04072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-generation-of-time-frequency-features-with-application-in-audio-synthesis-1902.04072"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-for-polyphonic-piano-transcription-a-case-study-1902.04390</loc><lastmod>2019-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-for-polyphonic-piano-transcription-a-case-study-1902.04390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-for-polyphonic-piano-transcription-a-case-study-1902.04390"/></url>
<url><loc>https://scifaro.com/en/abs/furcanext-end-to-end-monaural-speech-separation-with-dynamic-gated-dilated-temporal-convolutional-networks-1902.04891</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/furcanext-end-to-end-monaural-speech-separation-with-dynamic-gated-dilated-temporal-convolutional-networks-1902.04891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/furcanext-end-to-end-monaural-speech-separation-with-dynamic-gated-dilated-temporal-convolutional-networks-1902.04891"/></url>
<url><loc>https://scifaro.com/en/abs/improving-performance-and-inference-on-audio-classification-tasks-using-capsule-networks-1902.05069</loc><lastmod>2019-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-performance-and-inference-on-audio-classification-tasks-using-capsule-networks-1902.05069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-performance-and-inference-on-audio-classification-tasks-using-capsule-networks-1902.05069"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-robot-speech-recognition-using-biomimetic-binaural-sound-source-localization-1902.05446</loc><lastmod>2019-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-robot-speech-recognition-using-biomimetic-binaural-sound-source-localization-1902.05446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-robot-speech-recognition-using-biomimetic-binaural-sound-source-localization-1902.05446"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-uncertainty-propagation-method-for-robust-i-vector-based-speaker-recognition-1902.05761</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-uncertainty-propagation-method-for-robust-i-vector-based-speaker-recognition-1902.05761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-uncertainty-propagation-method-for-robust-i-vector-based-speaker-recognition-1902.05761"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-lyrics-alignment-for-polyphonic-music-using-an-audio-to-character-recognition-model-1902.06797</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-lyrics-alignment-for-polyphonic-music-using-an-audio-to-character-recognition-model-1902.06797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-lyrics-alignment-for-polyphonic-music-using-an-audio-to-character-recognition-model-1902.06797"/></url>
<url><loc>https://scifaro.com/en/abs/p-reverb-perceptual-characterization-of-early-and-late-reflections-for-auditory-displays-1902.06880</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/p-reverb-perceptual-characterization-of-early-and-late-reflections-for-auditory-displays-1902.06880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/p-reverb-perceptual-characterization-of-early-and-late-reflections-for-auditory-displays-1902.06880"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-deep-clustering-for-speech-separation-1902.07033</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-deep-clustering-for-speech-separation-1902.07033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-deep-clustering-for-speech-separation-1902.07033"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-voice-cloning-for-neural-singing-synthesis-1902.07292</loc><lastmod>2019-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-voice-cloning-for-neural-singing-synthesis-1902.07292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-voice-cloning-for-neural-singing-synthesis-1902.07292"/></url>
<url><loc>https://scifaro.com/en/abs/audio-linguistic-embeddings-for-spoken-sentences-1902.07817</loc><lastmod>2019-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-linguistic-embeddings-for-spoken-sentences-1902.07817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-linguistic-embeddings-for-spoken-sentences-1902.07817"/></url>
<url><loc>https://scifaro.com/en/abs/the-nigens-general-sound-events-database-1902.08314</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nigens-general-sound-events-database-1902.08314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nigens-general-sound-events-database-1902.08314"/></url>
<url><loc>https://scifaro.com/en/abs/gansynth-adversarial-neural-audio-synthesis-1902.08710</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gansynth-adversarial-neural-audio-synthesis-1902.08710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gansynth-adversarial-neural-audio-synthesis-1902.08710"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-and-compression-for-passive-acoustic-monitoring-of-the-african-forest-elephant-1902.09069</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-and-compression-for-passive-acoustic-monitoring-of-the-african-forest-elephant-1902.09069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-and-compression-for-passive-acoustic-monitoring-of-the-african-forest-elephant-1902.09069"/></url>
<url><loc>https://scifaro.com/en/abs/robust-sound-source-localization-considering-similarity-of-back-propagation-signals-1902.09179</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-sound-source-localization-considering-similarity-of-back-propagation-signals-1902.09179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-sound-source-localization-considering-similarity-of-back-propagation-signals-1902.09179"/></url>
<url><loc>https://scifaro.com/en/abs/audio-caption-listen-and-tell-1902.09254</loc><lastmod>2020-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-caption-listen-and-tell-1902.09254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-caption-listen-and-tell-1902.09254"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-multi-layer-temporal-pooling-based-on-convolutional-neural-network-1902.10063</loc><lastmod>2019-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-multi-layer-temporal-pooling-based-on-convolutional-neural-network-1902.10063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-multi-layer-temporal-pooling-based-on-convolutional-neural-network-1902.10063"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-neural-architecture-for-instrumental-audio-tasks-1903.00142</loc><lastmod>2019-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-neural-architecture-for-instrumental-audio-tasks-1903.00142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-neural-architecture-for-instrumental-audio-tasks-1903.00142"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-labelled-audioset-tagging-with-attention-neural-networks-1903.00765</loc><lastmod>2019-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-labelled-audioset-tagging-with-attention-neural-networks-1903.00765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-labelled-audioset-tagging-with-attention-neural-networks-1903.00765"/></url>
<url><loc>https://scifaro.com/en/abs/traditional-machine-learning-for-pitch-detection-1903.01290</loc><lastmod>2019-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/traditional-machine-learning-for-pitch-detection-1903.01290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/traditional-machine-learning-for-pitch-detection-1903.01290"/></url>
<url><loc>https://scifaro.com/en/abs/improving-singing-voice-separation-using-deep-u-net-and-wave-u-net-with-data-augmentation-1903.01415</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-singing-voice-separation-using-deep-u-net-and-wave-u-net-with-data-augmentation-1903.01415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-singing-voice-separation-using-deep-u-net-and-wave-u-net-with-data-augmentation-1903.01415"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-drum-transcription-with-convolutional-neural-networks-1903.01416</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-drum-transcription-with-convolutional-neural-networks-1903.01416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-drum-transcription-with-convolutional-neural-networks-1903.01416"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-visibility-graphs-application-to-similarity-of-harmonic-signals-1903.01976</loc><lastmod>2019-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-visibility-graphs-application-to-similarity-of-harmonic-signals-1903.01976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-visibility-graphs-application-to-similarity-of-harmonic-signals-1903.01976"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-music-features-by-knowledge-transfer-from-user-item-log-data-1903.02794</loc><lastmod>2019-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-music-features-by-knowledge-transfer-from-user-item-log-data-1903.02794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-music-features-by-knowledge-transfer-from-user-item-log-data-1903.02794"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-merging-source-and-filter-based-information-1903.02844</loc><lastmod>2019-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-merging-source-and-filter-based-information-1903.02844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-merging-source-and-filter-based-information-1903.02844"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-deep-complex-u-net-1903.03107</loc><lastmod>2019-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-deep-complex-u-net-1903.03107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-deep-complex-u-net-1903.03107"/></url>
<url><loc>https://scifaro.com/en/abs/the-life-of-a-new-york-city-noise-sensor-network-1903.03195</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-life-of-a-new-york-city-noise-sensor-network-1903.03195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-life-of-a-new-york-city-noise-sensor-network-1903.03195"/></url>
<url><loc>https://scifaro.com/en/abs/fast-multichannel-source-separation-based-on-jointly-diagonalizable-spatial-covariance-matrices-1903.03237</loc><lastmod>2019-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-multichannel-source-separation-based-on-jointly-diagonalizable-spatial-covariance-matrices-1903.03237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-multichannel-source-separation-based-on-jointly-diagonalizable-spatial-covariance-matrices-1903.03237"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-generative-model-of-speech-complex-spectrograms-1903.03269</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-generative-model-of-speech-complex-spectrograms-1903.03269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-generative-model-of-speech-complex-spectrograms-1903.03269"/></url>
<url><loc>https://scifaro.com/en/abs/deep-griffin-lim-iteration-1903.03971</loc><lastmod>2019-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-griffin-lim-iteration-1903.03971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-griffin-lim-iteration-1903.03971"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-generative-adversarial-binary-networks-for-music-generation-1903.04722</loc><lastmod>2019-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-generative-adversarial-binary-networks-for-music-generation-1903.04722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-generative-adversarial-binary-networks-for-music-generation-1903.04722"/></url>
<url><loc>https://scifaro.com/en/abs/a-vocoder-based-method-for-singing-voice-extraction-1903.07554</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-vocoder-based-method-for-singing-voice-extraction-1903.07554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-vocoder-based-method-for-singing-voice-extraction-1903.07554"/></url>
<url><loc>https://scifaro.com/en/abs/smart-edition-of-midi-files-1903.08459</loc><lastmod>2019-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smart-edition-of-midi-files-1903.08459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smart-edition-of-midi-files-1903.08459"/></url>
<url><loc>https://scifaro.com/en/abs/craft-a-multifunction-online-platform-for-speech-prosody-visualisation-1903.08718</loc><lastmod>2019-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/craft-a-multifunction-online-platform-for-speech-prosody-visualisation-1903.08718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/craft-a-multifunction-online-platform-for-speech-prosody-visualisation-1903.08718"/></url>
<url><loc>https://scifaro.com/en/abs/improving-machine-hearing-on-limited-data-sets-1903.08950</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-machine-hearing-on-limited-data-sets-1903.08950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-machine-hearing-on-limited-data-sets-1903.08950"/></url>
<url><loc>https://scifaro.com/en/abs/bandwidth-extension-on-raw-audio-via-generative-adversarial-networks-1903.09027</loc><lastmod>2019-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandwidth-extension-on-raw-audio-via-generative-adversarial-networks-1903.09027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandwidth-extension-on-raw-audio-via-generative-adversarial-networks-1903.09027"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-enhancement-based-on-multichannel-nmf-informed-beamforming-for-noise-robust-automatic-speech-recognition-1903.09341</loc><lastmod>2019-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-based-on-multichannel-nmf-informed-beamforming-for-noise-robust-automatic-speech-recognition-1903.09341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-based-on-multichannel-nmf-informed-beamforming-for-noise-robust-automatic-speech-recognition-1903.09341"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-based-on-third-order-circular-suprasegmental-hidden-markov-model-1903.09803</loc><lastmod>2019-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-based-on-third-order-circular-suprasegmental-hidden-markov-model-1903.09803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-based-on-third-order-circular-suprasegmental-hidden-markov-model-1903.09803"/></url>
<url><loc>https://scifaro.com/en/abs/conditioning-a-recurrent-neural-network-to-synthesize-musical-instrument-transients-1903.10703</loc><lastmod>2019-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioning-a-recurrent-neural-network-to-synthesize-musical-instrument-transients-1903.10703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioning-a-recurrent-neural-network-to-synthesize-musical-instrument-transients-1903.10703"/></url>
<url><loc>https://scifaro.com/en/abs/wgansing-a-multi-voice-singing-voice-synthesizer-based-on-the-wasserstein-gan-1903.10729</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wgansing-a-multi-voice-singing-voice-synthesizer-based-on-the-wasserstein-gan-1903.10729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wgansing-a-multi-voice-singing-voice-synthesizer-based-on-the-wasserstein-gan-1903.10729"/></url>
<url><loc>https://scifaro.com/en/abs/musical-tempo-and-key-estimation-using-convolutional-neural-networks-with-directional-filters-1903.10839</loc><lastmod>2019-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-tempo-and-key-estimation-using-convolutional-neural-networks-with-directional-filters-1903.10839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-tempo-and-key-estimation-using-convolutional-neural-networks-with-directional-filters-1903.10839"/></url>
<url><loc>https://scifaro.com/en/abs/muse-ing-on-the-impact-of-utterance-ordering-on-crowdsourced-emotion-annotations-1903.11672</loc><lastmod>2019-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muse-ing-on-the-impact-of-utterance-ordering-on-crowdsourced-emotion-annotations-1903.11672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muse-ing-on-the-impact-of-utterance-ordering-on-crowdsourced-emotion-annotations-1903.11672"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-pooling-structure-for-weakly-labeled-sound-event-detection-1903.11791</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-pooling-structure-for-weakly-labeled-sound-event-detection-1903.11791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-pooling-structure-for-weakly-labeled-sound-event-detection-1903.11791"/></url>
<url><loc>https://scifaro.com/en/abs/joining-sound-event-detection-and-localization-through-spatial-segregation-1904.00055</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joining-sound-event-detection-and-localization-through-spatial-segregation-1904.00055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joining-sound-event-detection-and-localization-through-spatial-segregation-1904.00055"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-time-frequency-attention-for-acoustic-event-detection-1904.00063</loc><lastmod>2019-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-time-frequency-attention-for-acoustic-event-detection-1904.00063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-time-frequency-attention-for-acoustic-event-detection-1904.00063"/></url>
<url><loc>https://scifaro.com/en/abs/static-visual-spatial-priors-for-doa-estimation-1904.00202</loc><lastmod>2019-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/static-visual-spatial-priors-for-doa-estimation-1904.00202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/static-visual-spatial-priors-for-doa-estimation-1904.00202"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-training-of-neural-mask-based-beamforming-1904.01578</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-training-of-neural-mask-based-beamforming-1904.01578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-training-of-neural-mask-based-beamforming-1904.01578"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-binaural-sound-localisation-from-the-raw-waveform-1904.01916</loc><lastmod>2019-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-binaural-sound-localisation-from-the-raw-waveform-1904.01916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-binaural-sound-localisation-from-the-raw-waveform-1904.01916"/></url>
<url><loc>https://scifaro.com/en/abs/gedi-gammachirp-envelope-distortion-index-for-predicting-intelligibility-of-enhanced-speech-1904.02096</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gedi-gammachirp-envelope-distortion-index-for-predicting-intelligibility-of-enhanced-speech-1904.02096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gedi-gammachirp-envelope-distortion-index-for-predicting-intelligibility-of-enhanced-speech-1904.02096"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-blind-source-separation-with-microphones-and-blinkies-1904.02334</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-blind-source-separation-with-microphones-and-blinkies-1904.02334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-blind-source-separation-with-microphones-and-blinkies-1904.02334"/></url>
<url><loc>https://scifaro.com/en/abs/libritts-a-corpus-derived-from-librispeech-for-text-to-speech-1904.02882</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libritts-a-corpus-derived-from-librispeech-for-text-to-speech-1904.02882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libritts-a-corpus-derived-from-librispeech-for-text-to-speech-1904.02882"/></url>
<url><loc>https://scifaro.com/en/abs/wavecyclegan2-time-domain-neural-post-filter-for-speech-waveform-generation-1904.02892</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavecyclegan2-time-domain-neural-post-filter-for-speech-waveform-generation-1904.02892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavecyclegan2-time-domain-neural-post-filter-for-speech-waveform-generation-1904.02892"/></url>
<url><loc>https://scifaro.com/en/abs/recursive-speech-separation-for-unknown-number-of-speakers-1904.03065</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recursive-speech-separation-for-unknown-number-of-speakers-1904.03065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recursive-speech-separation-for-unknown-number-of-speakers-1904.03065"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generalized-speech-enhancement-with-generative-adversarial-networks-1904.03418</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generalized-speech-enhancement-with-generative-adversarial-networks-1904.03418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generalized-speech-enhancement-with-generative-adversarial-networks-1904.03418"/></url>
<url><loc>https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-and-spatial-localization-dcase-2019-baseline-systems-1904.03476</loc><lastmod>2019-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-and-spatial-localization-dcase-2019-baseline-systems-1904.03476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-and-spatial-localization-dcase-2019-baseline-systems-1904.03476"/></url>
<url><loc>https://scifaro.com/en/abs/large-margin-softmax-loss-for-speaker-verification-1904.03479</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-margin-softmax-loss-for-speaker-verification-1904.03479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-margin-softmax-loss-for-speaker-verification-1904.03479"/></url>
<url><loc>https://scifaro.com/en/abs/taco-vc-a-single-speaker-tacotron-based-voice-conversion-with-limited-data-1904.03522</loc><lastmod>2020-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taco-vc-a-single-speaker-tacotron-based-voice-conversion-with-limited-data-1904.03522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taco-vc-a-single-speaker-tacotron-based-voice-conversion-with-limited-data-1904.03522"/></url>
<url><loc>https://scifaro.com/en/abs/spatio-temporal-attention-pooling-for-audio-scene-classification-1904.03543</loc><lastmod>2019-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatio-temporal-attention-pooling-for-audio-scene-classification-1904.03543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatio-temporal-attention-pooling-for-audio-scene-classification-1904.03543"/></url>
<url><loc>https://scifaro.com/en/abs/vae-based-regularization-for-deep-speaker-embedding-1904.03617</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vae-based-regularization-for-deep-speaker-embedding-1904.03617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vae-based-regularization-for-deep-speaker-embedding-1904.03617"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-non-parametric-multi-source-modelling-based-determined-blind-source-separation-1904.03787</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-non-parametric-multi-source-modelling-based-determined-blind-source-separation-1904.03787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-non-parametric-multi-source-modelling-based-determined-blind-source-separation-1904.03787"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-convolution-for-real-time-keyword-spotting-on-mobile-devices-1904.03814</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-convolution-for-real-time-keyword-spotting-on-mobile-devices-1904.03814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-convolution-for-real-time-keyword-spotting-on-mobile-devices-1904.03814"/></url>
<url><loc>https://scifaro.com/en/abs/direct-modelling-of-speech-emotion-from-raw-speech-1904.03833</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direct-modelling-of-speech-emotion-from-raw-speech-1904.03833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direct-modelling-of-speech-emotion-from-raw-speech-1904.03833"/></url>
<url><loc>https://scifaro.com/en/abs/duration-robust-weakly-supervised-sound-event-detection-1904.03841</loc><lastmod>2020-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/duration-robust-weakly-supervised-sound-event-detection-1904.03841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/duration-robust-weakly-supervised-sound-event-detection-1904.03841"/></url>
<url><loc>https://scifaro.com/en/abs/crossmodal-voice-conversion-1904.04540</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossmodal-voice-conversion-1904.04540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossmodal-voice-conversion-1904.04540"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-vc2-improved-cyclegan-based-non-parallel-voice-conversion-1904.04631</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-vc2-improved-cyclegan-based-non-parallel-voice-conversion-1904.04631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-vc2-improved-cyclegan-based-non-parallel-voice-conversion-1904.04631"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-deep-learning-strategies-for-automatic-speech-recognition-1904.04956</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-deep-learning-strategies-for-automatic-speech-recognition-1904.04956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-deep-learning-strategies-for-automatic-speech-recognition-1904.04956"/></url>
<url><loc>https://scifaro.com/en/abs/an-interactive-musical-prediction-system-with-mixture-density-recurrent-neural-networks-1904.05009</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-interactive-musical-prediction-system-with-mixture-density-recurrent-neural-networks-1904.05009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-interactive-musical-prediction-system-with-mixture-density-recurrent-neural-networks-1904.05009"/></url>
<url><loc>https://scifaro.com/en/abs/neuralogram-a-deep-neural-network-based-representation-for-audio-signals-1904.05073</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuralogram-a-deep-neural-network-based-representation-for-audio-signals-1904.05073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuralogram-a-deep-neural-network-based-representation-for-audio-signals-1904.05073"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-multi-f0-modeling-in-satb-choir-recordings-1904.05086</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-multi-f0-modeling-in-satb-choir-recordings-1904.05086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-multi-f0-modeling-in-satb-choir-recordings-1904.05086"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-by-implicitly-identifying-distinct-sound-events-1904.05204</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-by-implicitly-identifying-distinct-sound-events-1904.05204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-by-implicitly-identifying-distinct-sound-events-1904.05204"/></url>
<url><loc>https://scifaro.com/en/abs/a-compact-and-discriminative-feature-based-on-auditory-summary-statistics-for-acoustic-scene-classification-1904.05243</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-compact-and-discriminative-feature-based-on-auditory-summary-statistics-for-acoustic-scene-classification-1904.05243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-compact-and-discriminative-feature-based-on-auditory-summary-statistics-for-acoustic-scene-classification-1904.05243"/></url>
<url><loc>https://scifaro.com/en/abs/expectation-maximization-for-speech-source-separation-using-convolutive-transfer-function-1904.05249</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expectation-maximization-for-speech-source-separation-using-convolutive-transfer-function-1904.05249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expectation-maximization-for-speech-source-separation-using-convolutive-transfer-function-1904.05249"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoder-based-articulatory-to-acoustic-mapping-for-ultrasound-silent-speech-interfaces-1904.05259</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoder-based-articulatory-to-acoustic-mapping-for-ultrasound-silent-speech-interfaces-1904.05259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoder-based-articulatory-to-acoustic-mapping-for-ultrasound-silent-speech-interfaces-1904.05259"/></url>
<url><loc>https://scifaro.com/en/abs/stc-antispoofing-systems-for-the-asvspoof2019-challenge-1904.05576</loc><lastmod>2019-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stc-antispoofing-systems-for-the-asvspoof2019-challenge-1904.05576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stc-antispoofing-systems-for-the-asvspoof2019-challenge-1904.05576"/></url>
<url><loc>https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-spatial-localization-dcase-2019-baseline-systems-1904.05635</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-spatial-localization-dcase-2019-baseline-systems-1904.05635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-task-learning-for-audio-tagging-sound-event-detection-spatial-localization-dcase-2019-baseline-systems-1904.05635"/></url>
<url><loc>https://scifaro.com/en/abs/rnn-based-speech-synthesis-using-a-continuous-sinusoidal-model-1904.06075</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rnn-based-speech-synthesis-using-a-continuous-sinusoidal-model-1904.06075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rnn-based-speech-synthesis-using-a-continuous-sinusoidal-model-1904.06075"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-acoustic-to-articulatory-inversion-using-ultrasound-tongue-imaging-1904.06083</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-acoustic-to-articulatory-inversion-using-ultrasound-tongue-imaging-1904.06083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-acoustic-to-articulatory-inversion-using-ultrasound-tongue-imaging-1904.06083"/></url>
<url><loc>https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-voices-from-a-distance-challenge-1904.06093</loc><lastmod>2019-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-voices-from-a-distance-challenge-1904.06093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-voices-from-a-distance-challenge-1904.06093"/></url>
<url><loc>https://scifaro.com/en/abs/assisted-sound-sample-generation-with-musical-conditioning-in-adversarial-auto-encoders-1904.06215</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assisted-sound-sample-generation-with-musical-conditioning-in-adversarial-auto-encoders-1904.06215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assisted-sound-sample-generation-with-musical-conditioning-in-adversarial-auto-encoders-1904.06215"/></url>
<url><loc>https://scifaro.com/en/abs/proximal-binaural-sound-can-induce-subjective-frisson-1904.06851</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proximal-binaural-sound-can-induce-subjective-frisson-1904.06851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proximal-binaural-sound-can-induce-subjective-frisson-1904.06851"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-by-accumulating-per-frequency-modeling-fluctuations-1904.07612</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-by-accumulating-per-frequency-modeling-fluctuations-1904.07612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-by-accumulating-per-frequency-modeling-fluctuations-1904.07612"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speech-separation-with-time-and-frequency-cross-domain-joint-embedding-and-clustering-1904.07845</loc><lastmod>2019-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speech-separation-with-time-and-frequency-cross-domain-joint-embedding-and-clustering-1904.07845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speech-separation-with-time-and-frequency-cross-domain-joint-embedding-and-clustering-1904.07845"/></url>
<url><loc>https://scifaro.com/en/abs/expediting-tts-synthesis-with-adversarial-vocoding-1904.07944</loc><lastmod>2019-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expediting-tts-synthesis-with-adversarial-vocoding-1904.07944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expediting-tts-synthesis-with-adversarial-vocoding-1904.07944"/></url>
<url><loc>https://scifaro.com/en/abs/hard-sample-mining-for-the-improved-retraining-of-automatic-speech-recognition-1904.08031</loc><lastmod>2019-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hard-sample-mining-for-the-improved-retraining-of-automatic-speech-recognition-1904.08031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hard-sample-mining-for-the-improved-retraining-of-automatic-speech-recognition-1904.08031"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-task-learning-framework-for-overcoming-the-catastrophic-forgetting-in-automatic-speech-recognition-1904.08039</loc><lastmod>2019-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-task-learning-framework-for-overcoming-the-catastrophic-forgetting-in-automatic-speech-recognition-1904.08039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-task-learning-framework-for-overcoming-the-catastrophic-forgetting-in-automatic-speech-recognition-1904.08039"/></url>
<url><loc>https://scifaro.com/en/abs/mosnet-deep-learning-based-objective-assessment-for-voice-conversion-1904.08352</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mosnet-deep-learning-based-objective-assessment-for-voice-conversion-1904.08352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mosnet-deep-learning-based-objective-assessment-for-voice-conversion-1904.08352"/></url>
<url><loc>https://scifaro.com/en/abs/deep-filtering-signal-extraction-and-reconstruction-using-complex-time-frequency-filters-1904.08369</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-filtering-signal-extraction-and-reconstruction-using-complex-time-frequency-filters-1904.08369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-filtering-signal-extraction-and-reconstruction-using-complex-time-frequency-filters-1904.08369"/></url>
<url><loc>https://scifaro.com/en/abs/regression-and-classification-for-direction-of-arrival-estimation-with-convolutional-recurrent-neural-networks-1904.08452</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regression-and-classification-for-direction-of-arrival-estimation-with-convolutional-recurrent-neural-networks-1904.08452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regression-and-classification-for-direction-of-arrival-estimation-with-convolutional-recurrent-neural-networks-1904.08452"/></url>
<url><loc>https://scifaro.com/en/abs/inspecting-and-interacting-with-meaningful-music-representations-using-vae-1904.08842</loc><lastmod>2019-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inspecting-and-interacting-with-meaningful-music-representations-using-vae-1904.08842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inspecting-and-interacting-with-meaningful-music-representations-using-vae-1904.08842"/></url>
<url><loc>https://scifaro.com/en/abs/on-acoustic-modeling-for-broadband-beamforming-1904.08971</loc><lastmod>2019-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-acoustic-modeling-for-broadband-beamforming-1904.08971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-acoustic-modeling-for-broadband-beamforming-1904.08971"/></url>
<url><loc>https://scifaro.com/en/abs/tts-skins-speaker-conversion-via-asr-1904.08983</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-skins-speaker-conversion-via-asr-1904.08983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-skins-speaker-conversion-via-asr-1904.08983"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-environmental-sound-classification-using-a-1d-convolutional-neural-network-1904.08990</loc><lastmod>2019-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-environmental-sound-classification-using-a-1d-convolutional-neural-network-1904.08990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-environmental-sound-classification-using-a-1d-convolutional-neural-network-1904.08990"/></url>
<url><loc>https://scifaro.com/en/abs/hf0-a-hybrid-pitch-extraction-method-for-multimodal-voice-1904.09765</loc><lastmod>2019-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hf0-a-hybrid-pitch-extraction-method-for-multimodal-voice-1904.09765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hf0-a-hybrid-pitch-extraction-method-for-multimodal-voice-1904.09765"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-aligned-frame-mask-based-on-non-stationary-gabor-transform-with-application-to-content-dependent-speaker-comparison-1904.10380</loc><lastmod>2019-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-aligned-frame-mask-based-on-non-stationary-gabor-transform-with-application-to-content-dependent-speaker-comparison-1904.10380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-aligned-frame-mask-based-on-non-stationary-gabor-transform-with-application-to-content-dependent-speaker-comparison-1904.10380"/></url>
<url><loc>https://scifaro.com/en/abs/realizing-petabyte-scale-acoustic-modeling-1904.10584</loc><lastmod>2019-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realizing-petabyte-scale-acoustic-modeling-1904.10584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realizing-petabyte-scale-acoustic-modeling-1904.10584"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-based-on-the-wasserstein-distance-for-acoustic-scene-classification-1904.10678</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-based-on-the-wasserstein-distance-for-acoustic-scene-classification-1904.10678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-based-on-the-wasserstein-distance-for-acoustic-scene-classification-1904.10678"/></url>
<url><loc>https://scifaro.com/en/abs/an-attentional-neural-network-architecture-for-folk-song-classification-1904.11074</loc><lastmod>2019-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attentional-neural-network-architecture-for-folk-song-classification-1904.11074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attentional-neural-network-architecture-for-folk-song-classification-1904.11074"/></url>
<url><loc>https://scifaro.com/en/abs/divide-and-conquer-a-deep-casa-approach-to-talker-independent-monaural-speaker-separation-1904.11148</loc><lastmod>2019-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/divide-and-conquer-a-deep-casa-approach-to-talker-independent-monaural-speaker-separation-1904.11148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/divide-and-conquer-a-deep-casa-approach-to-talker-independent-monaural-speaker-separation-1904.11148"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-sincerity-detection-based-on-covariance-feature-vectors-and-ensemble-methods-1904.11641</loc><lastmod>2019-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-sincerity-detection-based-on-covariance-feature-vectors-and-ensemble-methods-1904.11641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-sincerity-detection-based-on-covariance-feature-vectors-and-ensemble-methods-1904.11641"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-with-sequentially-labelled-data-based-on-connectionist-temporal-classification-and-unsupervised-clustering-1904.12102</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-with-sequentially-labelled-data-based-on-connectionist-temporal-classification-and-unsupervised-clustering-1904.12102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-with-sequentially-labelled-data-based-on-connectionist-temporal-classification-and-unsupervised-clustering-1904.12102"/></url>
<url><loc>https://scifaro.com/en/abs/joint-analysis-of-acoustic-events-and-scenes-based-on-multitask-learning-1904.12146</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-events-and-scenes-based-on-multitask-learning-1904.12146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-events-and-scenes-based-on-multitask-learning-1904.12146"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automation-of-creativity-a-machine-intelligence-approach-1904.12194</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automation-of-creativity-a-machine-intelligence-approach-1904.12194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automation-of-creativity-a-machine-intelligence-approach-1904.12194"/></url>
<url><loc>https://scifaro.com/en/abs/cough-detection-using-hidden-markov-models-1904.12354</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cough-detection-using-hidden-markov-models-1904.12354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cough-detection-using-hidden-markov-models-1904.12354"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-online-automatic-speech-recognition-systems-and-the-nonverbal-responses-to-unintelligible-speech-1904.12403</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-online-automatic-speech-recognition-systems-and-the-nonverbal-responses-to-unintelligible-speech-1904.12403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-online-automatic-speech-recognition-systems-and-the-nonverbal-responses-to-unintelligible-speech-1904.12403"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-speaker-verification-1904.12406</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-speaker-verification-1904.12406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-speaker-verification-1904.12406"/></url>
<url><loc>https://scifaro.com/en/abs/localization-detection-and-tracking-of-multiple-moving-sound-sources-with-a-convolutional-recurrent-neural-network-1904.12769</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-detection-and-tracking-of-multiple-moving-sound-sources-with-a-convolutional-recurrent-neural-network-1904.12769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-detection-and-tracking-of-multiple-moving-sound-sources-with-a-convolutional-recurrent-neural-network-1904.12769"/></url>
<url><loc>https://scifaro.com/en/abs/performing-structured-improvisations-with-pre-trained-deep-learning-models-1904.13285</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performing-structured-improvisations-with-pre-trained-deep-learning-models-1904.13285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performing-structured-improvisations-with-pre-trained-deep-learning-models-1904.13285"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-audio-signal-processing-1905.00078</loc><lastmod>2019-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-audio-signal-processing-1905.00078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-audio-signal-processing-1905.00078"/></url>
<url><loc>https://scifaro.com/en/abs/a-style-transfer-approach-to-source-separation-1905.00151</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-style-transfer-approach-to-source-separation-1905.00151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-style-transfer-approach-to-source-separation-1905.00151"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-detection-and-localization-using-a-two-stage-strategy-1905.00268</loc><lastmod>2019-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-and-localization-using-a-two-stage-strategy-1905.00268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-and-localization-using-a-two-stage-strategy-1905.00268"/></url>
<url><loc>https://scifaro.com/en/abs/a-statistically-principled-and-computationally-efficient-approach-to-speech-enhancement-using-variational-autoencoders-1905.01209</loc><lastmod>2019-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-statistically-principled-and-computationally-efficient-approach-to-speech-enhancement-using-variational-autoencoders-1905.01209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-statistically-principled-and-computationally-efficient-approach-to-speech-enhancement-using-variational-autoencoders-1905.01209"/></url>
<url><loc>https://scifaro.com/en/abs/deep-tensor-factorization-for-spatially-aware-scene-decomposition-1905.01391</loc><lastmod>2019-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-tensor-factorization-for-spatially-aware-scene-decomposition-1905.01391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-tensor-factorization-for-spatially-aware-scene-decomposition-1905.01391"/></url>
<url><loc>https://scifaro.com/en/abs/topology-of-networks-in-generalized-musical-spaces-1905.01842</loc><lastmod>2019-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topology-of-networks-in-generalized-musical-spaces-1905.01842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topology-of-networks-in-generalized-musical-spaces-1905.01842"/></url>
<url><loc>https://scifaro.com/en/abs/learning-with-learned-loss-function-speech-enhancement-with-quality-net-to-improve-perceptual-evaluation-of-speech-quality-1905.01898</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-with-learned-loss-function-speech-enhancement-with-quality-net-to-improve-perceptual-evaluation-of-speech-quality-1905.01898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-with-learned-loss-function-speech-enhancement-with-quality-net-to-improve-perceptual-evaluation-of-speech-quality-1905.01898"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-kernel-shapes-and-skip-connections-for-deep-learning-based-harmonic-percussive-separation-1905.01899</loc><lastmod>2019-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-kernel-shapes-and-skip-connections-for-deep-learning-based-harmonic-percussive-separation-1905.01899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-kernel-shapes-and-skip-connections-for-deep-learning-based-harmonic-percussive-separation-1905.01899"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-representation-of-speech-and-music-1905.03278</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-representation-of-speech-and-music-1905.03278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-representation-of-speech-and-music-1905.03278"/></url>
<url><loc>https://scifaro.com/en/abs/universal-sound-separation-1905.03330</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-sound-separation-1905.03330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-sound-separation-1905.03330"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-deep-clustering-as-preprocessing-for-automatic-speech-recognition-of-sparsely-overlapping-speech-1905.03500</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-deep-clustering-as-preprocessing-for-automatic-speech-recognition-of-sparsely-overlapping-speech-1905.03500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-deep-clustering-as-preprocessing-for-automatic-speech-recognition-of-sparsely-overlapping-speech-1905.03500"/></url>
<url><loc>https://scifaro.com/en/abs/block-online-multi-channel-speech-enhancement-using-dnn-supported-relative-transfer-function-estimates-1905.03632</loc><lastmod>2020-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/block-online-multi-channel-speech-enhancement-using-dnn-supported-relative-transfer-function-estimates-1905.03632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/block-online-multi-channel-speech-enhancement-using-dnn-supported-relative-transfer-function-estimates-1905.03632"/></url>
<url><loc>https://scifaro.com/en/abs/sound-texture-synthesis-using-convolutional-neural-networks-1905.03637</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-texture-synthesis-using-convolutional-neural-networks-1905.03637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-texture-synthesis-using-convolutional-neural-networks-1905.03637"/></url>
<url><loc>https://scifaro.com/en/abs/multiclass-language-identification-using-deep-learning-on-spectral-images-of-audio-signals-1905.04348</loc><lastmod>2019-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiclass-language-identification-using-deep-learning-on-spectral-images-of-audio-signals-1905.04348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiclass-language-identification-using-deep-learning-on-spectral-images-of-audio-signals-1905.04348"/></url>
<url><loc>https://scifaro.com/en/abs/time-contrastive-learning-based-deep-bottleneck-features-for-text-dependent-speaker-verification-1905.04554</loc><lastmod>2019-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-contrastive-learning-based-deep-bottleneck-features-for-text-dependent-speaker-verification-1905.04554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-contrastive-learning-based-deep-bottleneck-features-for-text-dependent-speaker-verification-1905.04554"/></url>
<url><loc>https://scifaro.com/en/abs/metricgan-generative-adversarial-networks-based-black-box-metric-scores-optimization-for-speech-enhancement-1905.04874</loc><lastmod>2019-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metricgan-generative-adversarial-networks-based-black-box-metric-scores-optimization-for-speech-enhancement-1905.04874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metricgan-generative-adversarial-networks-based-black-box-metric-scores-optimization-for-speech-enhancement-1905.04874"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-audio-spatialization-with-correspondence-classifier-1905.05375</loc><lastmod>2019-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-audio-spatialization-with-correspondence-classifier-1905.05375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-audio-spatialization-with-correspondence-classifier-1905.05375"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-groove-with-inverse-sequence-transformations-1905.06118</loc><lastmod>2019-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-groove-with-inverse-sequence-transformations-1905.06118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-groove-with-inverse-sequence-transformations-1905.06118"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-channel-speech-separation-1905.06286</loc><lastmod>2019-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-channel-speech-separation-1905.06286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-channel-speech-separation-1905.06286"/></url>
<url><loc>https://scifaro.com/en/abs/multi-web-audio-sequencer-collaborative-music-making-1905.06717</loc><lastmod>2019-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-web-audio-sequencer-collaborative-music-making-1905.06717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-web-audio-sequencer-collaborative-music-making-1905.06717"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-study-of-speech-separation-spectrogram-vs-waveform-separation-1905.07497</loc><lastmod>2019-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-study-of-speech-separation-spectrogram-vs-waveform-separation-1905.07497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-study-of-speech-separation-spectrogram-vs-waveform-separation-1905.07497"/></url>
<url><loc>https://scifaro.com/en/abs/independent-vector-analysis-with-more-microphones-than-sources-1905.07880</loc><lastmod>2019-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-vector-analysis-with-more-microphones-than-sources-1905.07880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-vector-analysis-with-more-microphones-than-sources-1905.07880"/></url>
<url><loc>https://scifaro.com/en/abs/dance-hit-song-prediction-1905.08076</loc><lastmod>2019-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dance-hit-song-prediction-1905.08076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dance-hit-song-prediction-1905.08076"/></url>
<url><loc>https://scifaro.com/en/abs/robust-sound-event-detection-in-bioacoustic-sensor-networks-1905.08352</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-sound-event-detection-in-bioacoustic-sensor-networks-1905.08352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-sound-event-detection-in-bioacoustic-sensor-networks-1905.08352"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-room-reverberant-dataset-for-sound-event-localization-and-detection-1905.08546</loc><lastmod>2019-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-room-reverberant-dataset-for-sound-event-localization-and-detection-1905.08546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-room-reverberant-dataset-for-sound-event-localization-and-detection-1905.08546"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-pitch-tracking-based-on-the-harmonic-model-1905.08557</loc><lastmod>2019-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-pitch-tracking-based-on-the-harmonic-model-1905.08557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-pitch-tracking-based-on-the-harmonic-model-1905.08557"/></url>
<url><loc>https://scifaro.com/en/abs/une-ou-deux-composantes-la-r-eponse-de-la-diffusion-en-ondelettes-1905.08601</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/une-ou-deux-composantes-la-r-eponse-de-la-diffusion-en-ondelettes-1905.08601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/une-ou-deux-composantes-la-r-eponse-de-la-diffusion-en-ondelettes-1905.08601"/></url>
<url><loc>https://scifaro.com/en/abs/specialized-decision-surface-and-disentangled-feature-for-weakly-supervised-polyphonic-sound-event-detection-1905.10091</loc><lastmod>2020-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specialized-decision-surface-and-disentangled-feature-for-weakly-supervised-polyphonic-sound-event-detection-1905.10091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specialized-decision-surface-and-disentangled-feature-for-weakly-supervised-polyphonic-sound-event-detection-1905.10091"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-faces-from-voices-1905.10604</loc><lastmod>2019-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-faces-from-voices-1905.10604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-faces-from-voices-1905.10604"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-separation-of-a-conversation-from-background-via-attentional-gating-1905.10751</loc><lastmod>2019-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-separation-of-a-conversation-from-background-via-attentional-gating-1905.10751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-separation-of-a-conversation-from-background-via-attentional-gating-1905.10751"/></url>
<url><loc>https://scifaro.com/en/abs/et-gan-cross-language-emotion-transfer-based-on-cycle-consistent-generative-adversarial-networks-1905.11173</loc><lastmod>2020-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/et-gan-cross-language-emotion-transfer-based-on-cycle-consistent-generative-adversarial-networks-1905.11173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/et-gan-cross-language-emotion-transfer-based-on-cycle-consistent-generative-adversarial-networks-1905.11173"/></url>
<url><loc>https://scifaro.com/en/abs/demonstration-of-performancenet-a-convolutional-neural-network-model-for-score-to-audio-music-generation-1905.11689</loc><lastmod>2019-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/demonstration-of-performancenet-a-convolutional-neural-network-model-for-score-to-audio-music-generation-1905.11689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/demonstration-of-performancenet-a-convolutional-neural-network-model-for-score-to-audio-music-generation-1905.11689"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-based-cover-song-detection-1905.11700</loc><lastmod>2019-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-based-cover-song-detection-1905.11700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-based-cover-song-detection-1905.11700"/></url>
<url><loc>https://scifaro.com/en/abs/two-level-explanations-in-music-emotion-recognition-1905.11760</loc><lastmod>2019-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-level-explanations-in-music-emotion-recognition-1905.11760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-level-explanations-in-music-emotion-recognition-1905.11760"/></url>
<url><loc>https://scifaro.com/en/abs/texture-selection-for-automatic-music-genre-classification-1905.11959</loc><lastmod>2020-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/texture-selection-for-automatic-music-genre-classification-1905.11959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/texture-selection-for-automatic-music-genre-classification-1905.11959"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-definition-of-the-distortion-matrix-for-an-audio-to-score-alignment-system-1905.12324</loc><lastmod>2019-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-definition-of-the-distortion-matrix-for-an-audio-to-score-alignment-system-1905.12324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-definition-of-the-distortion-matrix-for-an-audio-to-score-alignment-system-1905.12324"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-audio-spoofing-detection-a-detailed-comparison-of-traditional-and-learned-features-1905.12439</loc><lastmod>2019-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-audio-spoofing-detection-a-detailed-comparison-of-traditional-and-learned-features-1905.12439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-audio-spoofing-detection-a-detailed-comparison-of-traditional-and-learned-features-1905.12439"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-multilabel-system-for-automatic-music-emotion-recognition-1905.12629</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-multilabel-system-for-automatic-music-emotion-recognition-1905.12629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-multilabel-system-for-automatic-music-emotion-recognition-1905.12629"/></url>
<url><loc>https://scifaro.com/en/abs/a-music-classification-model-based-on-metric-learning-and-feature-extraction-from-mp3-audio-files-1905.12804</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-music-classification-model-based-on-metric-learning-and-feature-extraction-from-mp3-audio-files-1905.12804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-music-classification-model-based-on-metric-learning-and-feature-extraction-from-mp3-audio-files-1905.12804"/></url>
<url><loc>https://scifaro.com/en/abs/audio-caption-in-a-car-setting-with-a-sentence-level-loss-1905.13448</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-caption-in-a-car-setting-with-a-sentence-level-loss-1905.13448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-caption-in-a-car-setting-with-a-sentence-level-loss-1905.13448"/></url>
<url><loc>https://scifaro.com/en/abs/problem-agnostic-speech-embeddings-for-multi-speaker-text-to-speech-with-samplernn-1906.00733</loc><lastmod>2019-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/problem-agnostic-speech-embeddings-for-multi-speaker-text-to-speech-with-samplernn-1906.00733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/problem-agnostic-speech-embeddings-for-multi-speaker-text-to-speech-with-samplernn-1906.00733"/></url>
<url><loc>https://scifaro.com/en/abs/a-surprising-density-of-illusionable-natural-speech-1906.01040</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-surprising-density-of-illusionable-natural-speech-1906.01040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-surprising-density-of-illusionable-natural-speech-1906.01040"/></url>
<url><loc>https://scifaro.com/en/abs/dilated-convolution-with-dilated-gru-for-music-source-separation-1906.01203</loc><lastmod>2019-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dilated-convolution-with-dilated-gru-for-music-source-separation-1906.01203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dilated-convolution-with-dilated-gru-for-music-source-separation-1906.01203"/></url>
<url><loc>https://scifaro.com/en/abs/musicntwrk-data-tools-for-music-theory-analysis-and-composition-1906.01453</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicntwrk-data-tools-for-music-theory-analysis-and-composition-1906.01453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicntwrk-data-tools-for-music-theory-analysis-and-composition-1906.01453"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-full-rank-spatial-covariance-estimation-using-independent-low-rank-matrix-analysis-for-blind-source-separation-1906.02482</loc><lastmod>2019-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-full-rank-spatial-covariance-estimation-using-independent-low-rank-matrix-analysis-for-blind-source-separation-1906.02482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-full-rank-spatial-covariance-estimation-using-independent-low-rank-matrix-analysis-for-blind-source-separation-1906.02482"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-separation-a-study-on-training-data-1906.02618</loc><lastmod>2019-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-separation-a-study-on-training-data-1906.02618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-separation-a-study-on-training-data-1906.02618"/></url>
<url><loc>https://scifaro.com/en/abs/audio-tagging-with-noisy-labels-and-minimal-supervision-1906.02975</loc><lastmod>2020-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-tagging-with-noisy-labels-and-minimal-supervision-1906.02975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-tagging-with-noisy-labels-and-minimal-supervision-1906.02975"/></url>
<url><loc>https://scifaro.com/en/abs/rvad-an-unsupervised-segment-based-robust-voice-activity-detection-method-1906.03588</loc><lastmod>2022-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rvad-an-unsupervised-segment-based-robust-voice-activity-detection-method-1906.03588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rvad-an-unsupervised-segment-based-robust-voice-activity-detection-method-1906.03588"/></url>
<url><loc>https://scifaro.com/en/abs/deep-music-analogy-via-latent-representation-disentanglement-1906.03626</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-music-analogy-via-latent-representation-disentanglement-1906.03626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-music-analogy-via-latent-representation-disentanglement-1906.03626"/></url>
<url><loc>https://scifaro.com/en/abs/deep-unsupervised-drum-transcription-1906.03697</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-unsupervised-drum-transcription-1906.03697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-unsupervised-drum-transcription-1906.03697"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-depth-analysis-with-different-channel-inputs-for-acoustic-scene-classification-1906.04591</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-depth-analysis-with-different-channel-inputs-for-acoustic-scene-classification-1906.04591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-depth-analysis-with-different-channel-inputs-for-acoustic-scene-classification-1906.04591"/></url>
<url><loc>https://scifaro.com/en/abs/toward-interpretable-music-tagging-with-self-attention-1906.04972</loc><lastmod>2019-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-interpretable-music-tagging-with-self-attention-1906.04972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-interpretable-music-tagging-with-self-attention-1906.04972"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-in-the-life-authorship-attribution-of-lennon-mccartney-songs-1906.05427</loc><lastmod>2019-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-in-the-life-authorship-attribution-of-lennon-mccartney-songs-1906.05427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-in-the-life-authorship-attribution-of-lennon-mccartney-songs-1906.05427"/></url>
<url><loc>https://scifaro.com/en/abs/cross-cultural-data-shows-musical-scales-evolved-to-maximise-imperfect-fifths-1906.06171</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-cultural-data-shows-musical-scales-evolved-to-maximise-imperfect-fifths-1906.06171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-cultural-data-shows-musical-scales-evolved-to-maximise-imperfect-fifths-1906.06171"/></url>
<url><loc>https://scifaro.com/en/abs/user-curated-shaping-of-expressive-performances-1906.06428</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/user-curated-shaping-of-expressive-performances-1906.06428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/user-curated-shaping-of-expressive-performances-1906.06428"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-consonance-and-its-relationships-with-temperament-harmony-and-electronic-amplification-1906.06559</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-consonance-and-its-relationships-with-temperament-harmony-and-electronic-amplification-1906.06559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-consonance-and-its-relationships-with-temperament-harmony-and-electronic-amplification-1906.06559"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-approach-to-real-time-impulsive-sound-detection-for-surveillance-applications-1906.06586</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-approach-to-real-time-impulsive-sound-detection-for-surveillance-applications-1906.06586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-approach-to-real-time-impulsive-sound-detection-for-surveillance-applications-1906.06586"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-embedded-cnn-for-music-tagging-mse-cnn-1906.06746</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-embedded-cnn-for-music-tagging-mse-cnn-1906.06746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-embedded-cnn-for-music-tagging-mse-cnn-1906.06746"/></url>
<url><loc>https://scifaro.com/en/abs/parametric-resynthesis-with-neural-vocoders-1906.06762</loc><lastmod>2019-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parametric-resynthesis-with-neural-vocoders-1906.06762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parametric-resynthesis-with-neural-vocoders-1906.06762"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-music-modality-with-a-key-class-invariant-pitch-chroma-cnn-1906.07145</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-music-modality-with-a-key-class-invariant-pitch-chroma-cnn-1906.07145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-music-modality-with-a-key-class-invariant-pitch-chroma-cnn-1906.07145"/></url>
<url><loc>https://scifaro.com/en/abs/a-monaural-speech-enhancement-method-for-robust-small-footprint-keyword-spotting-1906.08415</loc><lastmod>2019-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-monaural-speech-enhancement-method-for-robust-small-footprint-keyword-spotting-1906.08415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-monaural-speech-enhancement-method-for-robust-small-footprint-keyword-spotting-1906.08415"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-learning-for-improved-onsets-and-frames-music-transcription-1906.08512</loc><lastmod>2019-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-learning-for-improved-onsets-and-frames-music-transcription-1906.08512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-learning-for-improved-onsets-and-frames-music-transcription-1906.08512"/></url>
<url><loc>https://scifaro.com/en/abs/learning-discriminative-features-using-center-loss-and-reconstruction-as-regularizer-for-speech-emotion-recognition-1906.08873</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-discriminative-features-using-center-loss-and-reconstruction-as-regularizer-for-speech-emotion-recognition-1906.08873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-discriminative-features-using-center-loss-and-reconstruction-as-regularizer-for-speech-emotion-recognition-1906.08873"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-and-classifying-cultural-music-using-melodic-features-case-of-hindustani-carnatic-and-turkish-music-1906.08916</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-and-classifying-cultural-music-using-melodic-features-case-of-hindustani-carnatic-and-turkish-music-1906.08916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-and-classifying-cultural-music-using-melodic-features-case-of-hindustani-carnatic-and-turkish-music-1906.08916"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-using-deep-autoregressive-neural-networks-for-acoustic-modeling-1906.08977</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-using-deep-autoregressive-neural-networks-for-acoustic-modeling-1906.08977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-using-deep-autoregressive-neural-networks-for-acoustic-modeling-1906.08977"/></url>
<url><loc>https://scifaro.com/en/abs/query-based-deep-improvisation-1906.09155</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/query-based-deep-improvisation-1906.09155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/query-based-deep-improvisation-1906.09155"/></url>
<url><loc>https://scifaro.com/en/abs/deep-polyphonic-adsr-piano-note-transcription-1906.09165</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-polyphonic-adsr-piano-note-transcription-1906.09165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-polyphonic-adsr-piano-note-transcription-1906.09165"/></url>
<url><loc>https://scifaro.com/en/abs/the-shape-of-remixxxes-to-come-audio-texture-synthesis-with-time-frequency-scattering-1906.09334</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-shape-of-remixxxes-to-come-audio-texture-synthesis-with-time-frequency-scattering-1906.09334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-shape-of-remixxxes-to-come-audio-texture-synthesis-with-time-frequency-scattering-1906.09334"/></url>
<url><loc>https://scifaro.com/en/abs/keyword-spotting-for-hearing-assistive-devices-robust-to-external-speakers-1906.09417</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keyword-spotting-for-hearing-assistive-devices-robust-to-external-speakers-1906.09417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keyword-spotting-for-hearing-assistive-devices-robust-to-external-speakers-1906.09417"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-vocoder-with-hierarchical-generation-of-amplitude-and-phase-spectra-for-statistical-parametric-speech-synthesis-1906.09573</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-vocoder-with-hierarchical-generation-of-amplitude-and-phase-spectra-for-statistical-parametric-speech-synthesis-1906.09573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-vocoder-with-hierarchical-generation-of-amplitude-and-phase-spectra-for-statistical-parametric-speech-synthesis-1906.09573"/></url>
<url><loc>https://scifaro.com/en/abs/ultrasound-based-silent-speech-interface-built-on-a-continuous-vocoder-1906.09885</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultrasound-based-silent-speech-interface-built-on-a-continuous-vocoder-1906.09885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultrasound-based-silent-speech-interface-built-on-a-continuous-vocoder-1906.09885"/></url>
<url><loc>https://scifaro.com/en/abs/self-multi-head-attention-for-speaker-recognition-1906.09890</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-multi-head-attention-for-speaker-recognition-1906.09890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-multi-head-attention-for-speaker-recognition-1906.09890"/></url>
<url><loc>https://scifaro.com/en/abs/classical-music-prediction-and-composition-by-means-of-variational-autoencoders-1906.09972</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classical-music-prediction-and-composition-by-means-of-variational-autoencoders-1906.09972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classical-music-prediction-and-composition-by-means-of-variational-autoencoders-1906.09972"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-separation-with-auxiliary-speaker-embeddings-1906.09997</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-separation-with-auxiliary-speaker-embeddings-1906.09997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-separation-with-auxiliary-speaker-embeddings-1906.09997"/></url>
<url><loc>https://scifaro.com/en/abs/who-said-that-audio-visual-speaker-diarisation-of-real-world-meetings-1906.10042</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-said-that-audio-visual-speaker-diarisation-of-real-world-meetings-1906.10042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-said-that-audio-visual-speaker-diarisation-of-real-world-meetings-1906.10042"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-approach-to-melody-line-identification-in-symbolic-scores-1906.10547</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-approach-to-melody-line-identification-in-symbolic-scores-1906.10547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-approach-to-melody-line-identification-in-symbolic-scores-1906.10547"/></url>
<url><loc>https://scifaro.com/en/abs/naver-at-activitynet-challenge-2019-task-b-active-speaker-detection-ava-1906.10555</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naver-at-activitynet-challenge-2019-task-b-active-speaker-detection-ava-1906.10555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naver-at-activitynet-challenge-2019-task-b-active-speaker-detection-ava-1906.10555"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-performance-of-residual-block-design-alternatives-in-convolutional-neural-networks-for-end-to-end-audio-classification-1906.10891</loc><lastmod>2019-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-performance-of-residual-block-design-alternatives-in-convolutional-neural-networks-for-end-to-end-audio-classification-1906.10891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-performance-of-residual-block-design-alternatives-in-convolutional-neural-networks-for-end-to-end-audio-classification-1906.10891"/></url>
<url><loc>https://scifaro.com/en/abs/learning-a-joint-embedding-space-of-monophonic-and-mixed-music-signals-for-singing-voice-1906.11139</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-a-joint-embedding-space-of-monophonic-and-mixed-music-signals-for-singing-voice-1906.11139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-a-joint-embedding-space-of-monophonic-and-mixed-music-signals-for-singing-voice-1906.11139"/></url>
<url><loc>https://scifaro.com/en/abs/wham-extending-speech-separation-to-noisy-environments-1907.01160</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wham-extending-speech-separation-to-noisy-environments-1907.01160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wham-extending-speech-separation-to-noisy-environments-1907.01160"/></url>
<url><loc>https://scifaro.com/en/abs/can-a-robot-hear-the-shape-and-dimensions-of-a-room-1907.01169</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-a-robot-hear-the-shape-and-dimensions-of-a-room-1907.01169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-a-robot-hear-the-shape-and-dimensions-of-a-room-1907.01169"/></url>
<url><loc>https://scifaro.com/en/abs/kite-automatic-speech-recognition-for-unmanned-aerial-vehicles-1907.01195</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kite-automatic-speech-recognition-for-unmanned-aerial-vehicles-1907.01195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kite-automatic-speech-recognition-for-unmanned-aerial-vehicles-1907.01195"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-classifiers-for-audio-impairments-with-noisy-labels-1907.01742</loc><lastmod>2019-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-classifiers-for-audio-impairments-with-noisy-labels-1907.01742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-classifiers-for-audio-impairments-with-noisy-labels-1907.01742"/></url>
<url><loc>https://scifaro.com/en/abs/a-case-study-of-deep-learned-activations-via-hand-crafted-audio-features-1907.01813</loc><lastmod>2019-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-case-study-of-deep-learned-activations-via-hand-crafted-audio-features-1907.01813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-case-study-of-deep-learned-activations-via-hand-crafted-audio-features-1907.01813"/></url>
<url><loc>https://scifaro.com/en/abs/cover-detection-using-dominant-melody-embeddings-1907.01824</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cover-detection-using-dominant-melody-embeddings-1907.01824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cover-detection-using-dominant-melody-embeddings-1907.01824"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-convolutional-recurrent-neural-network-for-environmental-sound-classification-1907.02230</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-convolutional-recurrent-neural-network-for-environmental-sound-classification-1907.02230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-convolutional-recurrent-neural-network-for-environmental-sound-classification-1907.02230"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-symbolic-music-style-translation-using-synthetic-data-1907.02265</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-symbolic-music-style-translation-using-synthetic-data-1907.02265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-symbolic-music-style-translation-using-synthetic-data-1907.02265"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-network-based-speech-enhancement-for-cochlear-implant-recipients-1907.02526</loc><lastmod>2019-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-network-based-speech-enhancement-for-cochlear-implant-recipients-1907.02526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-network-based-speech-enhancement-for-cochlear-implant-recipients-1907.02526"/></url>
<url><loc>https://scifaro.com/en/abs/neural-drum-machine-an-interactive-system-for-real-time-synthesis-of-drum-sounds-1907.02637</loc><lastmod>2019-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-drum-machine-an-interactive-system-for-real-time-synthesis-of-drum-sounds-1907.02637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-drum-machine-an-interactive-system-for-real-time-synthesis-of-drum-sounds-1907.02637"/></url>
<url><loc>https://scifaro.com/en/abs/a-bi-directional-transformer-for-musical-chord-recognition-1907.02698</loc><lastmod>2019-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bi-directional-transformer-for-musical-chord-recognition-1907.02698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bi-directional-transformer-for-musical-chord-recognition-1907.02698"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-baselines-for-computational-paralinguistics-1907.02864</loc><lastmod>2020-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-baselines-for-computational-paralinguistics-1907.02864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-baselines-for-computational-paralinguistics-1907.02864"/></url>
<url><loc>https://scifaro.com/en/abs/towards-explainable-music-emotion-recognition-the-route-via-mid-level-features-1907.03572</loc><lastmod>2019-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-explainable-music-emotion-recognition-the-route-via-mid-level-features-1907.03572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-explainable-music-emotion-recognition-the-route-via-mid-level-features-1907.03572"/></url>
<url><loc>https://scifaro.com/en/abs/improving-reverberant-speech-training-using-diffuse-acoustic-simulation-1907.03988</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-reverberant-speech-training-using-diffuse-acoustic-simulation-1907.03988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-reverberant-speech-training-using-diffuse-acoustic-simulation-1907.03988"/></url>
<url><loc>https://scifaro.com/en/abs/evolution-of-the-informational-complexity-of-contemporary-western-music-1907.04292</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolution-of-the-informational-complexity-of-contemporary-western-music-1907.04292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolution-of-the-informational-complexity-of-contemporary-western-music-1907.04292"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-conditioning-for-generative-music-systems-with-human-interpretable-controls-1907.04352</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-conditioning-for-generative-music-systems-with-human-interpretable-controls-1907.04352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-conditioning-for-generative-music-systems-with-human-interpretable-controls-1907.04352"/></url>
<url><loc>https://scifaro.com/en/abs/lakhnes-improving-multi-instrumental-music-generation-with-cross-domain-pre-training-1907.04868</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lakhnes-improving-multi-instrumental-music-generation-with-cross-domain-pre-training-1907.04868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lakhnes-improving-multi-instrumental-music-generation-with-cross-domain-pre-training-1907.04868"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-loss-function-for-supervised-speech-source-separation-by-mask-based-beamforming-1907.04984</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-loss-function-for-supervised-speech-source-separation-by-mask-based-beamforming-1907.04984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-loss-function-for-supervised-speech-source-separation-by-mask-based-beamforming-1907.04984"/></url>
<url><loc>https://scifaro.com/en/abs/explicitly-conditioned-melody-generation-a-case-study-with-interdependent-rnns-1907.05208</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explicitly-conditioned-melody-generation-a-case-study-with-interdependent-rnns-1907.05208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explicitly-conditioned-melody-generation-a-case-study-with-interdependent-rnns-1907.05208"/></url>
<url><loc>https://scifaro.com/en/abs/toeplitz-inverse-covariance-based-robust-speaker-clustering-for-naturalistic-audio-streams-1907.05584</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toeplitz-inverse-covariance-based-robust-speaker-clustering-for-naturalistic-audio-streams-1907.05584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toeplitz-inverse-covariance-based-robust-speaker-clustering-for-naturalistic-audio-streams-1907.05584"/></url>
<url><loc>https://scifaro.com/en/abs/learning-complex-basis-functions-for-invariant-representations-of-audio-1907.05982</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-complex-basis-functions-for-invariant-representations-of-audio-1907.05982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-complex-basis-functions-for-invariant-representations-of-audio-1907.05982"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-semi-supervised-adversarial-autoencoding-for-speech-emotion-recognition-1907.06078</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-semi-supervised-adversarial-autoencoding-for-speech-emotion-recognition-1907.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-semi-supervised-adversarial-autoencoding-for-speech-emotion-recognition-1907.06078"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-cross-lingual-speech-emotion-recognition-1907.06083</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-cross-lingual-speech-emotion-recognition-1907.06083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-cross-lingual-speech-emotion-recognition-1907.06083"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-voice-pathology-detection-1907.06129</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-voice-pathology-detection-1907.06129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-voice-pathology-detection-1907.06129"/></url>
<url><loc>https://scifaro.com/en/abs/the-bach-doodle-approachable-music-composition-with-machine-learning-at-scale-1907.06637</loc><lastmod>2019-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-bach-doodle-approachable-music-composition-with-machine-learning-at-scale-1907.06637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-bach-doodle-approachable-music-composition-with-machine-learning-at-scale-1907.06637"/></url>
<url><loc>https://scifaro.com/en/abs/hodgepodge-sound-event-detection-based-on-ensemble-of-semi-supervised-learning-methods-1907.07398</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hodgepodge-sound-event-detection-based-on-ensemble-of-semi-supervised-learning-methods-1907.07398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hodgepodge-sound-event-detection-based-on-ensemble-of-semi-supervised-learning-methods-1907.07398"/></url>
<url><loc>https://scifaro.com/en/abs/language-modelling-for-sound-event-detection-with-teacher-forcing-and-scheduled-sampling-1907.08506</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-modelling-for-sound-event-detection-with-teacher-forcing-and-scheduled-sampling-1907.08506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-modelling-for-sound-event-detection-with-teacher-forcing-and-scheduled-sampling-1907.08506"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-instrument-classification-robust-to-audio-effects-1907.08520</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-instrument-classification-robust-to-audio-effects-1907.08520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-instrument-classification-robust-to-audio-effects-1907.08520"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-knowledge-bases-and-parallel-annotations-for-music-genre-translation-1907.08698</loc><lastmod>2019-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-knowledge-bases-and-parallel-annotations-for-music-genre-translation-1907.08698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-knowledge-bases-and-parallel-annotations-for-music-genre-translation-1907.08698"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourcing-a-dataset-of-audio-captions-1907.09238</loc><lastmod>2019-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourcing-a-dataset-of-audio-captions-1907.09238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourcing-a-dataset-of-audio-captions-1907.09238"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-learning-for-monaural-speech-separation-using-deep-embedding-features-1907.09884</loc><lastmod>2019-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-learning-for-monaural-speech-separation-using-deep-embedding-features-1907.09884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-learning-for-monaural-speech-separation-using-deep-embedding-features-1907.09884"/></url>
<url><loc>https://scifaro.com/en/abs/log-complex-color-for-visual-pattern-recognition-of-total-sound-1907.09936</loc><lastmod>2019-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/log-complex-color-for-visual-pattern-recognition-of-total-sound-1907.09936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/log-complex-color-for-visual-pattern-recognition-of-total-sound-1907.09936"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-lungs-auscultation-with-reinforcement-learning-agent-1907.11238</loc><lastmod>2019-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-lungs-auscultation-with-reinforcement-learning-agent-1907.11238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-lungs-auscultation-with-reinforcement-learning-agent-1907.11238"/></url>
<url><loc>https://scifaro.com/en/abs/dilated-fcn-listening-longer-to-hear-better-1907.11956</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dilated-fcn-listening-longer-to-hear-better-1907.11956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dilated-fcn-listening-longer-to-hear-better-1907.11956"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-vc2-rethinking-conditional-methods-for-stargan-based-voice-conversion-1907.12279</loc><lastmod>2019-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-vc2-rethinking-conditional-methods-for-stargan-based-voice-conversion-1907.12279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-vc2-rethinking-conditional-methods-for-stargan-based-voice-conversion-1907.12279"/></url>
<url><loc>https://scifaro.com/en/abs/marine-mammal-species-classification-using-convolutional-neural-networks-and-a-novel-acoustic-representation-1907.13188</loc><lastmod>2019-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/marine-mammal-species-classification-using-convolutional-neural-networks-and-a-novel-acoustic-representation-1907.13188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/marine-mammal-species-classification-using-convolutional-neural-networks-and-a-novel-acoustic-representation-1907.13188"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-cochlear-implant-users-ability-for-speaker-identification-using-ci-auditory-stimuli-1908.00031</loc><lastmod>2019-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-cochlear-implant-users-ability-for-speaker-identification-using-ci-auditory-stimuli-1908.00031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-cochlear-implant-users-ability-for-speaker-identification-using-ci-auditory-stimuli-1908.00031"/></url>
<url><loc>https://scifaro.com/en/abs/high-level-control-of-drum-track-generation-using-learned-patterns-of-rhythmic-interaction-1908.00948</loc><lastmod>2019-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-level-control-of-drum-track-generation-using-learned-patterns-of-rhythmic-interaction-1908.00948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-level-control-of-drum-track-generation-using-learned-patterns-of-rhythmic-interaction-1908.00948"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-based-music-generation-system-1908.01080</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-based-music-generation-system-1908.01080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-based-music-generation-system-1908.01080"/></url>
<url><loc>https://scifaro.com/en/abs/v2s-attack-building-dnn-based-voice-conversion-from-automatic-speaker-verification-1908.01454</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/v2s-attack-building-dnn-based-voice-conversion-from-automatic-speaker-verification-1908.01454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/v2s-attack-building-dnn-based-voice-conversion-from-automatic-speaker-verification-1908.01454"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-sounds-for-wellbeing-a-novel-dataset-and-baseline-results-1908.01671</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-sounds-for-wellbeing-a-novel-dataset-and-baseline-results-1908.01671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-sounds-for-wellbeing-a-novel-dataset-and-baseline-results-1908.01671"/></url>
<url><loc>https://scifaro.com/en/abs/adversarially-trained-end-to-end-korean-singing-voice-synthesis-system-1908.01919</loc><lastmod>2019-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarially-trained-end-to-end-korean-singing-voice-synthesis-system-1908.01919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarially-trained-end-to-end-korean-singing-voice-synthesis-system-1908.01919"/></url>
<url><loc>https://scifaro.com/en/abs/acceleration-of-rank-constrained-spatial-covariance-matrix-estimation-for-blind-speech-extraction-1908.01964</loc><lastmod>2019-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acceleration-of-rank-constrained-spatial-covariance-matrix-estimation-for-blind-speech-extraction-1908.01964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acceleration-of-rank-constrained-spatial-covariance-matrix-estimation-for-blind-speech-extraction-1908.01964"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-conditional-variational-auto-encoders-1908.02590</loc><lastmod>2020-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-conditional-variational-auto-encoders-1908.02590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-using-conditional-variational-auto-encoders-1908.02590"/></url>
<url><loc>https://scifaro.com/en/abs/viterbi-extraction-tutorial-with-hidden-markov-toolkit-1908.03143</loc><lastmod>2019-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/viterbi-extraction-tutorial-with-hidden-markov-toolkit-1908.03143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/viterbi-extraction-tutorial-with-hidden-markov-toolkit-1908.03143"/></url>
<url><loc>https://scifaro.com/en/abs/interleaved-multitask-learning-for-audio-source-separation-with-independent-databases-1908.05182</loc><lastmod>2019-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interleaved-multitask-learning-for-audio-source-separation-with-independent-databases-1908.05182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interleaved-multitask-learning-for-audio-source-separation-with-independent-databases-1908.05182"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-using-simple-temporal-features-and-pitch-synchronous-cepstral-coefficients-1908.05553</loc><lastmod>2019-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-using-simple-temporal-features-and-pitch-synchronous-cepstral-coefficients-1908.05553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-using-simple-temporal-features-and-pitch-synchronous-cepstral-coefficients-1908.05553"/></url>
<url><loc>https://scifaro.com/en/abs/sub-spectrogram-segmentation-for-environmental-sound-classification-via-convolutional-recurrent-neural-network-and-score-level-fusion-1908.05863</loc><lastmod>2019-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-spectrogram-segmentation-for-environmental-sound-classification-via-convolutional-recurrent-neural-network-and-score-level-fusion-1908.05863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-spectrogram-segmentation-for-environmental-sound-classification-via-convolutional-recurrent-neural-network-and-score-level-fusion-1908.05863"/></url>
<url><loc>https://scifaro.com/en/abs/jvs-corpus-free-japanese-multi-speaker-voice-corpus-1908.06248</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jvs-corpus-free-japanese-multi-speaker-voice-corpus-1908.06248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jvs-corpus-free-japanese-multi-speaker-voice-corpus-1908.06248"/></url>
<url><loc>https://scifaro.com/en/abs/a-dual-staged-context-aggregation-method-towards-efficient-end-to-end-speech-enhancement-1908.06468</loc><lastmod>2020-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dual-staged-context-aggregation-method-towards-efficient-end-to-end-speech-enhancement-1908.06468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dual-staged-context-aggregation-method-towards-efficient-end-to-end-speech-enhancement-1908.06468"/></url>
<url><loc>https://scifaro.com/en/abs/audio-query-based-music-source-separation-1908.06593</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-query-based-music-source-separation-1908.06593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-query-based-music-source-separation-1908.06593"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generating-ambisonics-using-audio-visual-cue-for-virtual-reality-1908.06752</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generating-ambisonics-using-audio-visual-cue-for-virtual-reality-1908.06752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generating-ambisonics-using-audio-visual-cue-for-virtual-reality-1908.06752"/></url>
<url><loc>https://scifaro.com/en/abs/musical-rhythm-transcription-based-on-bayesian-piece-specific-score-models-capturing-repetitions-1908.06969</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-rhythm-transcription-based-on-bayesian-piece-specific-score-models-capturing-repetitions-1908.06969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-rhythm-transcription-based-on-bayesian-piece-specific-score-models-capturing-repetitions-1908.06969"/></url>
<url><loc>https://scifaro.com/en/abs/a-microphone-array-and-voice-algorithm-based-smart-hearing-aid-1908.07324</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-microphone-array-and-voice-algorithm-based-smart-hearing-aid-1908.07324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-microphone-array-and-voice-algorithm-based-smart-hearing-aid-1908.07324"/></url>
<url><loc>https://scifaro.com/en/abs/ai-for-earth-rainforest-conservation-by-acoustic-surveillance-1908.07517</loc><lastmod>2019-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-for-earth-rainforest-conservation-by-acoustic-surveillance-1908.07517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-for-earth-rainforest-conservation-by-acoustic-surveillance-1908.07517"/></url>
<url><loc>https://scifaro.com/en/abs/coarse-to-fine-optimization-for-speech-enhancement-1908.08044</loc><lastmod>2019-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coarse-to-fine-optimization-for-speech-enhancement-1908.08044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coarse-to-fine-optimization-for-speech-enhancement-1908.08044"/></url>
<url><loc>https://scifaro.com/en/abs/sound-localization-and-separation-in-three-dimensional-space-using-a-single-microphone-with-a-metamaterial-enclosure-1908.08160</loc><lastmod>2019-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-localization-and-separation-in-three-dimensional-space-using-a-single-microphone-with-a-metamaterial-enclosure-1908.08160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-localization-and-separation-in-three-dimensional-space-using-a-single-microphone-with-a-metamaterial-enclosure-1908.08160"/></url>
<url><loc>https://scifaro.com/en/abs/improving-automatic-jazz-melody-generation-by-transfer-learning-techniques-1908.09484</loc><lastmod>2019-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-automatic-jazz-melody-generation-by-transfer-learning-techniques-1908.09484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-automatic-jazz-melody-generation-by-transfer-learning-techniques-1908.09484"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-tasks-and-investigation-of-subjective-evaluation-methods-in-environmental-sound-synthesis-and-conversion-1908.10055</loc><lastmod>2019-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-tasks-and-investigation-of-subjective-evaluation-methods-in-environmental-sound-synthesis-and-conversion-1908.10055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-tasks-and-investigation-of-subjective-evaluation-methods-in-environmental-sound-synthesis-and-conversion-1908.10055"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-parametric-deep-learning-approach-for-sound-event-localization-and-detection-1908.10133</loc><lastmod>2019-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-parametric-deep-learning-approach-for-sound-event-localization-and-detection-1908.10133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-parametric-deep-learning-approach-for-sound-event-localization-and-detection-1908.10133"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-recurrent-neural-network-based-progressive-learning-for-monaural-speech-enhancement-1908.10768</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-network-based-progressive-learning-for-monaural-speech-enhancement-1908.10768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-network-based-progressive-learning-for-monaural-speech-enhancement-1908.10768"/></url>
<url><loc>https://scifaro.com/en/abs/environment-sound-classification-using-multiple-feature-channels-and-attention-based-deep-convolutional-neural-network-1908.11219</loc><lastmod>2020-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environment-sound-classification-using-multiple-feature-channels-and-attention-based-deep-convolutional-neural-network-1908.11219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environment-sound-classification-using-multiple-feature-channels-and-attention-based-deep-convolutional-neural-network-1908.11219"/></url>
<url><loc>https://scifaro.com/en/abs/deep-bayesian-unsupervised-source-separation-based-on-a-complex-gaussian-mixture-model-1908.11307</loc><lastmod>2019-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-bayesian-unsupervised-source-separation-based-on-a-complex-gaussian-mixture-model-1908.11307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-bayesian-unsupervised-source-separation-based-on-a-complex-gaussian-mixture-model-1908.11307"/></url>
<url><loc>https://scifaro.com/en/abs/voice-spoofing-detection-corpus-for-single-and-multi-order-audio-replays-1909.00935</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-spoofing-detection-corpus-for-single-and-multi-order-audio-replays-1909.00935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-spoofing-detection-corpus-for-single-and-multi-order-audio-replays-1909.00935"/></url>
<url><loc>https://scifaro.com/en/abs/on-loss-functions-for-supervised-monaural-time-domain-speech-enhancement-1909.01019</loc><lastmod>2020-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-loss-functions-for-supervised-monaural-time-domain-speech-enhancement-1909.01019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-loss-functions-for-supervised-monaural-time-domain-speech-enhancement-1909.01019"/></url>
<url><loc>https://scifaro.com/en/abs/demucs-deep-extractor-for-music-sources-with-extra-unlabeled-data-remixed-1909.01174</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/demucs-deep-extractor-for-music-sources-with-extra-unlabeled-data-remixed-1909.01174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/demucs-deep-extractor-for-music-sources-with-extra-unlabeled-data-remixed-1909.01174"/></url>
<url><loc>https://scifaro.com/en/abs/multiresolution-analysis-discrete-wavelet-transform-through-daubechies-family-for-emotion-recognition-in-speech-1909.01265</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiresolution-analysis-discrete-wavelet-transform-through-daubechies-family-for-emotion-recognition-in-speech-1909.01265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiresolution-analysis-discrete-wavelet-transform-through-daubechies-family-for-emotion-recognition-in-speech-1909.01265"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-and-perceptually-motivated-auditory-neural-encoding-and-decoding-algorithm-for-spiking-neural-networks-1909.01302</loc><lastmod>2019-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-and-perceptually-motivated-auditory-neural-encoding-and-decoding-algorithm-for-spiking-neural-networks-1909.01302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-and-perceptually-motivated-auditory-neural-encoding-and-decoding-algorithm-for-spiking-neural-networks-1909.01302"/></url>
<url><loc>https://scifaro.com/en/abs/towards-interpretable-polyphonic-transcription-with-invertible-neural-networks-1909.01622</loc><lastmod>2019-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-interpretable-polyphonic-transcription-with-invertible-neural-networks-1909.01622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-interpretable-polyphonic-transcription-with-invertible-neural-networks-1909.01622"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-modeling-of-phonetic-durations-1909.03030</loc><lastmod>2019-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-modeling-of-phonetic-durations-1909.03030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-modeling-of-phonetic-durations-1909.03030"/></url>
<url><loc>https://scifaro.com/en/abs/impulse-response-data-augmentation-and-deep-neural-networks-for-blind-room-acoustic-parameter-estimation-1909.03642</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impulse-response-data-augmentation-and-deep-neural-networks-for-blind-room-acoustic-parameter-estimation-1909.03642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impulse-response-data-augmentation-and-deep-neural-networks-for-blind-room-acoustic-parameter-estimation-1909.03642"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-and-interactive-tools-for-vocal-training-based-on-an-analytic-signal-with-a-cosine-series-envelope-1909.03650</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-and-interactive-tools-for-vocal-training-based-on-an-analytic-signal-with-a-cosine-series-envelope-1909.03650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-and-interactive-tools-for-vocal-training-based-on-an-analytic-signal-with-a-cosine-series-envelope-1909.03650"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-estuarine-dolphin-whistles-in-spectrogram-images-1909.04425</loc><lastmod>2019-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-estuarine-dolphin-whistles-in-spectrogram-images-1909.04425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-estuarine-dolphin-whistles-in-spectrogram-images-1909.04425"/></url>
<url><loc>https://scifaro.com/en/abs/computer-assisted-composition-in-continuous-time-1909.05030</loc><lastmod>2019-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computer-assisted-composition-in-continuous-time-1909.05030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computer-assisted-composition-in-continuous-time-1909.05030"/></url>
<url><loc>https://scifaro.com/en/abs/the-emotions-that-we-perceive-in-music-the-influence-of-language-and-lyrics-comprehension-on-agreement-1909.05882</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-emotions-that-we-perceive-in-music-the-influence-of-language-and-lyrics-comprehension-on-agreement-1909.05882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-emotions-that-we-perceive-in-music-the-influence-of-language-and-lyrics-comprehension-on-agreement-1909.05882"/></url>
<url><loc>https://scifaro.com/en/abs/musicnn-pre-trained-convolutional-neural-networks-for-music-audio-tagging-1909.06654</loc><lastmod>2019-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicnn-pre-trained-convolutional-neural-networks-for-music-audio-tagging-1909.06654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicnn-pre-trained-convolutional-neural-networks-for-music-audio-tagging-1909.06654"/></url>
<url><loc>https://scifaro.com/en/abs/a-scalable-noisy-speech-dataset-and-online-subjective-test-framework-1909.08050</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-scalable-noisy-speech-dataset-and-online-subjective-test-framework-1909.08050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-scalable-noisy-speech-dataset-and-online-subjective-test-framework-1909.08050"/></url>
<url><loc>https://scifaro.com/en/abs/musical-instrument-classification-via-low-dimensional-feature-vectors-1909.08444</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-instrument-classification-via-low-dimensional-feature-vectors-1909.08444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-instrument-classification-via-low-dimensional-feature-vectors-1909.08444"/></url>
<url><loc>https://scifaro.com/en/abs/cutting-music-source-separation-some-slakh-a-dataset-to-study-the-impact-of-training-data-quality-and-quantity-1909.08494</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cutting-music-source-separation-some-slakh-a-dataset-to-study-the-impact-of-training-data-quality-and-quantity-1909.08494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cutting-music-source-separation-some-slakh-a-dataset-to-study-the-impact-of-training-data-quality-and-quantity-1909.08494"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-impact-of-ground-sound-1909.09235</loc><lastmod>2019-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-impact-of-ground-sound-1909.09235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-impact-of-ground-sound-1909.09235"/></url>
<url><loc>https://scifaro.com/en/abs/mimii-dataset-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-1909.09347</loc><lastmod>2019-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimii-dataset-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-1909.09347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimii-dataset-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-1909.09347"/></url>
<url><loc>https://scifaro.com/en/abs/an-extended-two-dimensional-vocal-tract-model-for-fast-acoustic-simulation-of-single-axis-symmetric-three-dimensional-tubes-1909.09585</loc><lastmod>2019-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-extended-two-dimensional-vocal-tract-model-for-fast-acoustic-simulation-of-single-axis-symmetric-three-dimensional-tubes-1909.09585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-extended-two-dimensional-vocal-tract-model-for-fast-acoustic-simulation-of-single-axis-symmetric-three-dimensional-tubes-1909.09585"/></url>
<url><loc>https://scifaro.com/en/abs/cochleanet-a-robust-language-independent-audio-visual-model-for-speech-enhancement-1909.10407</loc><lastmod>2019-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cochleanet-a-robust-language-independent-audio-visual-model-for-speech-enhancement-1909.10407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cochleanet-a-robust-language-independent-audio-visual-model-for-speech-enhancement-1909.10407"/></url>
<url><loc>https://scifaro.com/en/abs/humangan-generative-adversarial-network-with-human-based-discriminator-and-its-evaluation-in-speech-perception-modeling-1909.11391</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/humangan-generative-adversarial-network-with-human-based-discriminator-and-its-evaluation-in-speech-perception-modeling-1909.11391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/humangan-generative-adversarial-network-with-human-based-discriminator-and-its-evaluation-in-speech-perception-modeling-1909.11391"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-adversarial-networks-1909.11646</loc><lastmod>2019-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-adversarial-networks-1909.11646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-adversarial-networks-1909.11646"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-speech-enhancement-by-raw-waveform-mapping-using-fully-convolutional-networks-1909.11909</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-speech-enhancement-by-raw-waveform-mapping-using-fully-convolutional-networks-1909.11909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-speech-enhancement-by-raw-waveform-mapping-using-fully-convolutional-networks-1909.11909"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-intelligibility-of-electric-and-acoustic-stimulation-speech-using-fully-convolutional-networks-based-speech-enhancement-1909.11912</loc><lastmod>2019-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-intelligibility-of-electric-and-acoustic-stimulation-speech-using-fully-convolutional-networks-based-speech-enhancement-1909.11912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-intelligibility-of-electric-and-acoustic-stimulation-speech-using-fully-convolutional-networks-based-speech-enhancement-1909.11912"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-joint-effect-on-denoising-techniques-and-visual-cues-to-improve-speech-intelligibility-in-cochlear-implant-simulation-1909.11919</loc><lastmod>2020-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-joint-effect-on-denoising-techniques-and-visual-cues-to-improve-speech-intelligibility-in-cochlear-implant-simulation-1909.11919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-joint-effect-on-denoising-techniques-and-visual-cues-to-improve-speech-intelligibility-in-cochlear-implant-simulation-1909.11919"/></url>
<url><loc>https://scifaro.com/en/abs/urban-sound-tagging-using-convolutional-neural-networks-1909.12699</loc><lastmod>2019-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/urban-sound-tagging-using-convolutional-neural-networks-1909.12699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/urban-sound-tagging-using-convolutional-neural-networks-1909.12699"/></url>
<url><loc>https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-stressful-talking-conditions-1909.13070</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-stressful-talking-conditions-1909.13070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emirati-accented-speaker-identification-in-stressful-talking-conditions-1909.13070"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-third-order-circular-suprasegmental-hidden-markov-model-1909.13244</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-third-order-circular-suprasegmental-hidden-markov-model-1909.13244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-in-emotional-talking-environments-based-on-third-order-circular-suprasegmental-hidden-markov-model-1909.13244"/></url>
<url><loc>https://scifaro.com/en/abs/av-speech-enhancement-challenge-using-a-real-noisy-corpus-1910.00424</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-speech-enhancement-challenge-using-a-real-noisy-corpus-1910.00424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-speech-enhancement-challenge-using-a-real-noisy-corpus-1910.00424"/></url>
<url><loc>https://scifaro.com/en/abs/latent-space-representation-for-multi-target-speaker-detection-and-identification-with-a-sparse-dataset-using-triplet-neural-networks-1910.01463</loc><lastmod>2019-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-space-representation-for-multi-target-speaker-detection-and-identification-with-a-sparse-dataset-using-triplet-neural-networks-1910.01463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-space-representation-for-multi-target-speaker-detection-and-identification-with-a-sparse-dataset-using-triplet-neural-networks-1910.01463"/></url>
<url><loc>https://scifaro.com/en/abs/midi-miner-a-python-library-for-tonal-tension-and-track-classification-1910.02049</loc><lastmod>2020-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-miner-a-python-library-for-tonal-tension-and-track-classification-1910.02049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-miner-a-python-library-for-tonal-tension-and-track-classification-1910.02049"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-the-comb-filter-effect-and-interaural-coherence-for-binaural-source-separation-1910.02127</loc><lastmod>2019-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-the-comb-filter-effect-and-interaural-coherence-for-binaural-source-separation-1910.02127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-the-comb-filter-effect-and-interaural-coherence-for-binaural-source-separation-1910.02127"/></url>
<url><loc>https://scifaro.com/en/abs/the-sounds-of-music-science-of-musical-scales-iii-indian-classical-1910.06375</loc><lastmod>2019-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sounds-of-music-science-of-musical-scales-iii-indian-classical-1910.06375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sounds-of-music-science-of-musical-scales-iii-indian-classical-1910.06375"/></url>
<url><loc>https://scifaro.com/en/abs/vfnet-a-convolutional-architecture-for-accent-classification-1910.06697</loc><lastmod>2019-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vfnet-a-convolutional-architecture-for-accent-classification-1910.06697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vfnet-a-convolutional-architecture-for-accent-classification-1910.06697"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-based-on-a-large-margin-factorized-cnn-1910.06784</loc><lastmod>2019-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-based-on-a-large-margin-factorized-cnn-1910.06784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-based-on-a-large-margin-factorized-cnn-1910.06784"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-labeled-sound-event-detection-using-tri-training-and-adversarial-learning-1910.06790</loc><lastmod>2019-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-labeled-sound-event-detection-using-tri-training-and-adversarial-learning-1910.06790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-labeled-sound-event-detection-using-tri-training-and-adversarial-learning-1910.06790"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-and-temporal-convolutional-attention-for-text-independent-speaker-recognition-1910.07364</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-and-temporal-convolutional-attention-for-text-independent-speaker-recognition-1910.07364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-and-temporal-convolutional-attention-for-text-independent-speaker-recognition-1910.07364"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-enhancement-based-on-discrete-cosine-transform-1910.07840</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-enhancement-based-on-discrete-cosine-transform-1910.07840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-enhancement-based-on-discrete-cosine-transform-1910.07840"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-parameter-estimation-of-an-asymmetric-vocal-fold-oscillation-model-and-its-application-in-discriminating-vocal-fold-pathologies-1910.08886</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-parameter-estimation-of-an-asymmetric-vocal-fold-oscillation-model-and-its-application-in-discriminating-vocal-fold-pathologies-1910.08886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-parameter-estimation-of-an-asymmetric-vocal-fold-oscillation-model-and-its-application-in-discriminating-vocal-fold-pathologies-1910.08886"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-for-discovering-phonemic-tone-contours-1910.08987</loc><lastmod>2020-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-for-discovering-phonemic-tone-contours-1910.08987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-for-discovering-phonemic-tone-contours-1910.08987"/></url>
<url><loc>https://scifaro.com/en/abs/musical-instrument-playing-technique-detection-based-on-fcn-using-chinese-bowed-stringed-instrument-as-an-example-1910.09021</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-instrument-playing-technique-detection-based-on-fcn-using-chinese-bowed-stringed-instrument-as-an-example-1910.09021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-instrument-playing-technique-detection-based-on-fcn-using-chinese-bowed-stringed-instrument-as-an-example-1910.09021"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-inpainting-of-time-frequency-masks-1910.09058</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-inpainting-of-time-frequency-masks-1910.09058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-inpainting-of-time-frequency-masks-1910.09058"/></url>
<url><loc>https://scifaro.com/en/abs/multi-band-multi-resolution-fully-convolutional-neural-networks-for-singing-voice-separation-1910.09266</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-band-multi-resolution-fully-convolutional-neural-networks-for-singing-voice-separation-1910.09266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-band-multi-resolution-fully-convolutional-neural-networks-for-singing-voice-separation-1910.09266"/></url>
<url><loc>https://scifaro.com/en/abs/clotho-an-audio-captioning-dataset-1910.09387</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clotho-an-audio-captioning-dataset-1910.09387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clotho-an-audio-captioning-dataset-1910.09387"/></url>
<url><loc>https://scifaro.com/en/abs/sound-texture-synthesis-using-ri-spectrograms-1910.09497</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-texture-synthesis-using-ri-spectrograms-1910.09497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-texture-synthesis-using-ri-spectrograms-1910.09497"/></url>
<url><loc>https://scifaro.com/en/abs/cross-task-pre-training-for-on-device-acoustic-scene-classification-1910.09935</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-task-pre-training-for-on-device-acoustic-scene-classification-1910.09935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-task-pre-training-for-on-device-acoustic-scene-classification-1910.09935"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-singing-synthesis-using-the-feed-forward-transformer-1910.09989</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-singing-synthesis-using-the-feed-forward-transformer-1910.09989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-singing-synthesis-using-the-feed-forward-transformer-1910.09989"/></url>
<url><loc>https://scifaro.com/en/abs/cross-representation-transferability-of-adversarial-attacks-from-spectrograms-to-audio-waveforms-1910.10106</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-representation-transferability-of-adversarial-attacks-from-spectrograms-to-audio-waveforms-1910.10106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-representation-transferability-of-adversarial-attacks-from-spectrograms-to-audio-waveforms-1910.10106"/></url>
<url><loc>https://scifaro.com/en/abs/learning-the-helix-topology-of-musical-pitch-1910.10246</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-the-helix-topology-of-musical-pitch-1910.10246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-the-helix-topology-of-musical-pitch-1910.10246"/></url>
<url><loc>https://scifaro.com/en/abs/whamr-noisy-and-reverberant-single-channel-speech-separation-1910.10279</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whamr-noisy-and-reverberant-single-channel-speech-separation-1910.10279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whamr-noisy-and-reverberant-single-channel-speech-separation-1910.10279"/></url>
<url><loc>https://scifaro.com/en/abs/filterbank-design-for-end-to-end-speech-separation-1910.10400</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filterbank-design-for-end-to-end-speech-separation-1910.10400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filterbank-design-for-end-to-end-speech-separation-1910.10400"/></url>
<url><loc>https://scifaro.com/en/abs/fast-independent-vector-extraction-by-iterative-sinr-maximization-1910.10654</loc><lastmod>2019-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-independent-vector-extraction-by-iterative-sinr-maximization-1910.10654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-independent-vector-extraction-by-iterative-sinr-maximization-1910.10654"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-multilateration-methods-for-single-source-localization-in-distributed-audio-1910.10661</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-multilateration-methods-for-single-source-localization-in-distributed-audio-1910.10661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-multilateration-methods-for-single-source-localization-in-distributed-audio-1910.10661"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-the-joint-optimization-of-perceptual-speech-metrics-1910.10707</loc><lastmod>2020-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-the-joint-optimization-of-perceptual-speech-metrics-1910.10707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-task-denoising-for-the-joint-optimization-of-perceptual-speech-metrics-1910.10707"/></url>
<url><loc>https://scifaro.com/en/abs/low-frequency-compensated-synthetic-impulse-responses-for-improved-far-field-speech-recognition-1910.10815</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-frequency-compensated-synthetic-impulse-responses-for-improved-far-field-speech-recognition-1910.10815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-frequency-compensated-synthetic-impulse-responses-for-improved-far-field-speech-recognition-1910.10815"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speech-separation-using-deep-embedding-model-with-multilayer-bootstrap-networks-1910.10912</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speech-separation-using-deep-embedding-model-with-multilayer-bootstrap-networks-1910.10912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speech-separation-using-deep-embedding-model-with-multilayer-bootstrap-networks-1910.10912"/></url>
<url><loc>https://scifaro.com/en/abs/syntonets-toward-a-harmony-inspired-general-model-of-complex-networks-1910.11047</loc><lastmod>2020-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syntonets-toward-a-harmony-inspired-general-model-of-complex-networks-1910.11047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syntonets-toward-a-harmony-inspired-general-model-of-complex-networks-1910.11047"/></url>
<url><loc>https://scifaro.com/en/abs/graph-representation-learning-for-audio-music-genre-classification-1910.11117</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-representation-learning-for-audio-music-genre-classification-1910.11117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-representation-learning-for-audio-music-genre-classification-1910.11117"/></url>
<url><loc>https://scifaro.com/en/abs/bootstrapping-deep-music-separation-from-primitive-auditory-grouping-principles-1910.11133</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bootstrapping-deep-music-separation-from-primitive-auditory-grouping-principles-1910.11133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bootstrapping-deep-music-separation-from-primitive-auditory-grouping-principles-1910.11133"/></url>
<url><loc>https://scifaro.com/en/abs/delving-into-voxceleb-environment-invariant-speaker-recognition-1910.11238</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delving-into-voxceleb-environment-invariant-speaker-recognition-1910.11238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delving-into-voxceleb-environment-invariant-speaker-recognition-1910.11238"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-in-deep-reinforcement-learning-for-automatic-speech-recognition-1910.11256</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-in-deep-reinforcement-learning-for-automatic-speech-recognition-1910.11256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-in-deep-reinforcement-learning-for-automatic-speech-recognition-1910.11256"/></url>
<url><loc>https://scifaro.com/en/abs/towards-fine-grained-prosody-control-for-voice-conversion-1910.11269</loc><lastmod>2020-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-fine-grained-prosody-control-for-voice-conversion-1910.11269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-fine-grained-prosody-control-for-voice-conversion-1910.11269"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-lexicon-free-modeling-units-for-end-to-end-korean-and-korean-english-code-switching-speech-recognition-1910.11590</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-lexicon-free-modeling-units-for-end-to-end-korean-and-korean-english-code-switching-speech-recognition-1910.11590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-lexicon-free-modeling-units-for-end-to-end-korean-and-korean-english-code-switching-speech-recognition-1910.11590"/></url>
<url><loc>https://scifaro.com/en/abs/channel-adversarial-training-for-speaker-verification-and-diarization-1910.11643</loc><lastmod>2020-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-adversarial-training-for-speaker-verification-and-diarization-1910.11643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-adversarial-training-for-speaker-verification-and-diarization-1910.11643"/></url>
<url><loc>https://scifaro.com/en/abs/secost-sequential-co-supervision-for-large-scale-weakly-labeled-audio-event-detection-1910.11789</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/secost-sequential-co-supervision-for-large-scale-weakly-labeled-audio-event-detection-1910.11789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/secost-sequential-co-supervision-for-large-scale-weakly-labeled-audio-event-detection-1910.11789"/></url>
<url><loc>https://scifaro.com/en/abs/mellotron-multispeaker-expressive-voice-synthesis-by-conditioning-on-rhythm-pitch-and-global-style-tokens-1910.11997</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mellotron-multispeaker-expressive-voice-synthesis-by-conditioning-on-rhythm-pitch-and-global-style-tokens-1910.11997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mellotron-multispeaker-expressive-voice-synthesis-by-conditioning-on-rhythm-pitch-and-global-style-tokens-1910.11997"/></url>
<url><loc>https://scifaro.com/en/abs/model-agnostic-approaches-to-handling-noisy-labels-when-training-sound-event-classifiers-1910.12004</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-agnostic-approaches-to-handling-noisy-labels-when-training-sound-event-classifiers-1910.12004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-agnostic-approaches-to-handling-noisy-labels-when-training-sound-event-classifiers-1910.12004"/></url>
<url><loc>https://scifaro.com/en/abs/a-holistic-approach-to-polyphonic-music-transcription-with-neural-networks-1910.12086</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-holistic-approach-to-polyphonic-music-transcription-with-neural-networks-1910.12086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-holistic-approach-to-polyphonic-music-transcription-with-neural-networks-1910.12086"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-for-end-to-end-low-resource-speech-recognition-1910.12094</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-for-end-to-end-low-resource-speech-recognition-1910.12094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-for-end-to-end-low-resource-speech-recognition-1910.12094"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-recognition-in-a-smart-city-surveillance-context-1910.12369</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-recognition-in-a-smart-city-surveillance-context-1910.12369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-recognition-in-a-smart-city-surveillance-context-1910.12369"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-pre-training-for-sequence-to-sequence-speech-recognition-1910.12418</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-pre-training-for-sequence-to-sequence-speech-recognition-1910.12418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-pre-training-for-sequence-to-sequence-speech-recognition-1910.12418"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-and-scalable-version-identification-using-musically-motivated-embeddings-1910.12551</loc><lastmod>2020-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-and-scalable-version-identification-using-musically-motivated-embeddings-1910.12551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-and-scalable-version-identification-using-musically-motivated-embeddings-1910.12551"/></url>
<url><loc>https://scifaro.com/en/abs/interrupted-and-cascaded-permutation-invariant-training-for-speech-separation-1910.12706</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interrupted-and-cascaded-permutation-invariant-training-for-speech-separation-1910.12706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interrupted-and-cascaded-permutation-invariant-training-for-speech-separation-1910.12706"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-timbre-and-singing-style-with-multi-singer-singing-synthesis-system-1910.13069</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-timbre-and-singing-style-with-multi-singer-singing-synthesis-system-1910.13069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-timbre-and-singing-style-with-multi-singer-singing-synthesis-system-1910.13069"/></url>
<url><loc>https://scifaro.com/en/abs/on-investigation-of-unsupervised-speech-factorization-based-on-normalization-flow-1910.13288</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-investigation-of-unsupervised-speech-factorization-based-on-normalization-flow-1910.13288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-investigation-of-unsupervised-speech-factorization-based-on-normalization-flow-1910.13288"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-optimal-dereverberation-and-beamforming-1910.13707</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-optimal-dereverberation-and-beamforming-1910.13707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-optimal-dereverberation-and-beamforming-1910.13707"/></url>
<url><loc>https://scifaro.com/en/abs/sms-wsj-database-performance-measures-and-baseline-recipe-for-multi-channel-source-separation-and-recognition-1910.13934</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sms-wsj-database-performance-measures-and-baseline-recipe-for-multi-channel-source-separation-and-recognition-1910.13934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sms-wsj-database-performance-measures-and-baseline-recipe-for-multi-channel-source-separation-and-recognition-1910.13934"/></url>
<url><loc>https://scifaro.com/en/abs/w-net-bf-dnn-based-beamformer-using-joint-training-approach-1910.14262</loc><lastmod>2020-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/w-net-bf-dnn-based-beamformer-using-joint-training-approach-1910.14262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/w-net-bf-dnn-based-beamformer-using-joint-training-approach-1910.14262"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-non-negative-autoencoders-for-sound-source-separation-1911.00102</loc><lastmod>2019-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-non-negative-autoencoders-for-sound-source-separation-1911.00102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-non-negative-autoencoders-for-sound-source-separation-1911.00102"/></url>
<url><loc>https://scifaro.com/en/abs/long-distance-detection-of-bioacoustic-events-with-per-channel-energy-normalization-1911.00417</loc><lastmod>2019-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-distance-detection-of-bioacoustic-events-with-per-channel-energy-normalization-1911.00417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-distance-detection-of-bioacoustic-events-with-per-channel-energy-normalization-1911.00417"/></url>
<url><loc>https://scifaro.com/en/abs/otomechanic-auditory-automobile-diagnostics-via-query-by-example-1911.02073</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/otomechanic-auditory-automobile-diagnostics-via-query-by-example-1911.02073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/otomechanic-auditory-automobile-diagnostics-via-query-by-example-1911.02073"/></url>
<url><loc>https://scifaro.com/en/abs/finding-strength-in-weakness-learning-to-separate-sounds-with-weak-supervision-1911.02182</loc><lastmod>2020-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/finding-strength-in-weakness-learning-to-separate-sounds-with-weak-supervision-1911.02182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/finding-strength-in-weakness-learning-to-separate-sounds-with-weak-supervision-1911.02182"/></url>
<url><loc>https://scifaro.com/en/abs/the-sound-of-my-voice-speaker-representation-loss-for-target-voice-separation-1911.02411</loc><lastmod>2020-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sound-of-my-voice-speaker-representation-loss-for-target-voice-separation-1911.02411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sound-of-my-voice-speaker-representation-loss-for-target-voice-separation-1911.02411"/></url>
<url><loc>https://scifaro.com/en/abs/online-spectrogram-inversion-for-low-latency-audio-source-separation-1911.03128</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-spectrogram-inversion-for-low-latency-audio-source-separation-1911.03128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-spectrogram-inversion-for-low-latency-audio-source-separation-1911.03128"/></url>
<url><loc>https://scifaro.com/en/abs/transformation-of-low-quality-device-recorded-speech-to-high-quality-speech-using-improved-segan-model-1911.03952</loc><lastmod>2019-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformation-of-low-quality-device-recorded-speech-to-high-quality-speech-using-improved-segan-model-1911.03952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformation-of-low-quality-device-recorded-speech-to-high-quality-speech-using-improved-segan-model-1911.03952"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-in-presence-of-background-noise-using-eeg-1911.04261</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-in-presence-of-background-noise-using-eeg-1911.04261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-in-presence-of-background-noise-using-eeg-1911.04261"/></url>
<url><loc>https://scifaro.com/en/abs/visualizing-and-understanding-self-attention-based-music-tagging-1911.04385</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizing-and-understanding-self-attention-based-music-tagging-1911.04385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizing-and-understanding-self-attention-based-music-tagging-1911.04385"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-initialization-of-lstm-networks-for-fundamental-frequency-detection-in-noisy-speech-signals-1911.04580</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-initialization-of-lstm-networks-for-fundamental-frequency-detection-in-noisy-speech-signals-1911.04580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-initialization-of-lstm-networks-for-fundamental-frequency-detection-in-noisy-speech-signals-1911.04580"/></url>
<url><loc>https://scifaro.com/en/abs/random-projections-of-mel-spectrograms-as-low-level-features-for-automatic-music-genre-classification-1911.04660</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/random-projections-of-mel-spectrograms-as-low-level-features-for-automatic-music-genre-classification-1911.04660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/random-projections-of-mel-spectrograms-as-low-level-features-for-automatic-music-genre-classification-1911.04660"/></url>
<url><loc>https://scifaro.com/en/abs/phasen-a-phase-and-harmonics-aware-speech-enhancement-network-1911.04697</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phasen-a-phase-and-harmonics-aware-speech-enhancement-network-1911.04697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phasen-a-phase-and-harmonics-aware-speech-enhancement-network-1911.04697"/></url>
<url><loc>https://scifaro.com/en/abs/using-musical-relationships-between-chord-labels-in-automatic-chord-extraction-tasks-1911.04973</loc><lastmod>2019-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-musical-relationships-between-chord-labels-in-automatic-chord-extraction-tasks-1911.04973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-musical-relationships-between-chord-labels-in-automatic-chord-extraction-tasks-1911.04973"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-and-theme-recognition-in-music-with-frequency-aware-rf-regularized-cnns-1911.05833</loc><lastmod>2019-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-and-theme-recognition-in-music-with-frequency-aware-rf-regularized-cnns-1911.05833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-and-theme-recognition-in-music-with-frequency-aware-rf-regularized-cnns-1911.05833"/></url>
<url><loc>https://scifaro.com/en/abs/coincidence-categorization-and-consolidation-learning-to-recognize-sounds-with-minimal-supervision-1911.05894</loc><lastmod>2019-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coincidence-categorization-and-consolidation-learning-to-recognize-sounds-with-minimal-supervision-1911.05894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coincidence-categorization-and-consolidation-learning-to-recognize-sounds-with-minimal-supervision-1911.05894"/></url>
<url><loc>https://scifaro.com/en/abs/scene-aware-audio-rendering-via-deep-acoustic-analysis-1911.06245</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scene-aware-audio-rendering-via-deep-acoustic-analysis-1911.06245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scene-aware-audio-rendering-via-deep-acoustic-analysis-1911.06245"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independence-of-neural-vocoders-and-their-effect-on-parametric-resynthesis-speech-enhancement-1911.06266</loc><lastmod>2019-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independence-of-neural-vocoders-and-their-effect-on-parametric-resynthesis-speech-enhancement-1911.06266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independence-of-neural-vocoders-and-their-effect-on-parametric-resynthesis-speech-enhancement-1911.06266"/></url>
<url><loc>https://scifaro.com/en/abs/deep-long-audio-inpainting-1911.06476</loc><lastmod>2019-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-long-audio-inpainting-1911.06476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-long-audio-inpainting-1911.06476"/></url>
<url><loc>https://scifaro.com/en/abs/sample-drop-detection-for-distant-speech-recognition-with-asynchronous-devices-distributed-in-space-1911.06713</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-drop-detection-for-distant-speech-recognition-with-asynchronous-devices-distributed-in-space-1911.06713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-drop-detection-for-distant-speech-recognition-with-asynchronous-devices-distributed-in-space-1911.06713"/></url>
<url><loc>https://scifaro.com/en/abs/music-theme-recognition-using-cnn-and-self-attention-1911.07041</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-theme-recognition-using-cnn-and-self-attention-1911.07041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-theme-recognition-using-cnn-and-self-attention-1911.07041"/></url>
<url><loc>https://scifaro.com/en/abs/n-hans-introducing-the-augsburg-neuro-holistic-audio-enhancement-system-1911.07062</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/n-hans-introducing-the-augsburg-neuro-holistic-audio-enhancement-system-1911.07062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/n-hans-introducing-the-augsburg-neuro-holistic-audio-enhancement-system-1911.07062"/></url>
<url><loc>https://scifaro.com/en/abs/voice-a-sound-event-detection-dataset-for-generalizable-domain-adaptation-1911.07098</loc><lastmod>2019-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-a-sound-event-detection-dataset-for-generalizable-domain-adaptation-1911.07098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-a-sound-event-detection-dataset-for-generalizable-domain-adaptation-1911.07098"/></url>
<url><loc>https://scifaro.com/en/abs/a-spatial-sampling-approach-to-wave-field-synthesis-pbap-and-huygens-arrays-1911.07575</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-spatial-sampling-approach-to-wave-field-synthesis-pbap-and-huygens-arrays-1911.07575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-spatial-sampling-approach-to-wave-field-synthesis-pbap-and-huygens-arrays-1911.07575"/></url>
<url><loc>https://scifaro.com/en/abs/improving-universal-sound-separation-using-sound-classification-1911.07951</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-universal-sound-separation-using-sound-classification-1911.07951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-universal-sound-separation-using-sound-classification-1911.07951"/></url>
<url><loc>https://scifaro.com/en/abs/sequential-multi-frame-neural-beamforming-for-speech-separation-and-enhancement-1911.07953</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequential-multi-frame-neural-beamforming-for-speech-separation-and-enhancement-1911.07953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequential-multi-frame-neural-beamforming-for-speech-separation-and-enhancement-1911.07953"/></url>
<url><loc>https://scifaro.com/en/abs/demystifying-tasnet-a-dissecting-approach-1911.08895</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/demystifying-tasnet-a-dissecting-approach-1911.08895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/demystifying-tasnet-a-dissecting-approach-1911.08895"/></url>
<url><loc>https://scifaro.com/en/abs/joint-nn-supported-multichannel-reduction-of-acoustic-echo-reverberation-and-noise-1911.08934</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-nn-supported-multichannel-reduction-of-acoustic-echo-reverberation-and-noise-1911.08934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-nn-supported-multichannel-reduction-of-acoustic-echo-reverberation-and-noise-1911.08934"/></url>
<url><loc>https://scifaro.com/en/abs/moving-to-communicate-moving-to-interact-patterns-of-body-motion-in-musical-duo-performance-1911.09018</loc><lastmod>2019-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moving-to-communicate-moving-to-interact-patterns-of-body-motion-in-musical-duo-performance-1911.09018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moving-to-communicate-moving-to-interact-patterns-of-body-motion-in-musical-duo-performance-1911.09018"/></url>
<url><loc>https://scifaro.com/en/abs/designing-virtual-soundscapes-for-alzheimer-s-disease-care-1911.09459</loc><lastmod>2019-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/designing-virtual-soundscapes-for-alzheimer-s-disease-care-1911.09459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/designing-virtual-soundscapes-for-alzheimer-s-disease-care-1911.09459"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-transfer-in-neural-text-to-speech-using-global-pitch-and-loudness-features-1911.09645</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-transfer-in-neural-text-to-speech-using-global-pitch-and-loudness-features-1911.09645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-transfer-in-neural-text-to-speech-using-global-pitch-and-loudness-features-1911.09645"/></url>
<url><loc>https://scifaro.com/en/abs/gankyoku-a-generative-adversarial-network-for-shakuhachi-music-1911.10119</loc><lastmod>2019-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gankyoku-a-generative-adversarial-network-for-shakuhachi-music-1911.10119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gankyoku-a-generative-adversarial-network-for-shakuhachi-music-1911.10119"/></url>
<url><loc>https://scifaro.com/en/abs/narrow-band-deep-filtering-for-multichannel-speech-enhancement-1911.10791</loc><lastmod>2020-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/narrow-band-deep-filtering-for-multichannel-speech-enhancement-1911.10791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/narrow-band-deep-filtering-for-multichannel-speech-enhancement-1911.10791"/></url>
<url><loc>https://scifaro.com/en/abs/improving-polyphonic-music-models-with-feature-rich-encoding-1911.11775</loc><lastmod>2021-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-polyphonic-music-models-with-feature-rich-encoding-1911.11775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-polyphonic-music-models-with-feature-rich-encoding-1911.11775"/></url>
<url><loc>https://scifaro.com/en/abs/schr-odingernn-generative-modeling-of-raw-audio-as-a-continuously-observed-quantum-state-1911.11879</loc><lastmod>2019-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/schr-odingernn-generative-modeling-of-raw-audio-as-a-continuously-observed-quantum-state-1911.11879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/schr-odingernn-generative-modeling-of-raw-audio-as-a-continuously-observed-quantum-state-1911.11879"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-for-music-genre-multifaceted-review-and-experimentation-with-audioset-1911.12618</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-for-music-genre-multifaceted-review-and-experimentation-with-audioset-1911.12618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-for-music-genre-multifaceted-review-and-experimentation-with-audioset-1911.12618"/></url>
<url><loc>https://scifaro.com/en/abs/j-net-randomly-weighted-u-net-for-audio-source-separation-1911.12926</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/j-net-randomly-weighted-u-net-for-audio-source-separation-1911.12926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/j-net-randomly-weighted-u-net-for-audio-source-separation-1911.12926"/></url>
<url><loc>https://scifaro.com/en/abs/improving-voice-separation-by-incorporating-end-to-end-speech-recognition-1911.12928</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-voice-separation-by-incorporating-end-to-end-speech-recognition-1911.12928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-voice-separation-by-incorporating-end-to-end-speech-recognition-1911.12928"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-in-the-waveform-domain-1911.13254</loc><lastmod>2021-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-in-the-waveform-domain-1911.13254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-in-the-waveform-domain-1911.13254"/></url>
<url><loc>https://scifaro.com/en/abs/three-orthogonal-dimensions-for-psychoacoustic-sonification-1912.00766</loc><lastmod>2020-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/three-orthogonal-dimensions-for-psychoacoustic-sonification-1912.00766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/three-orthogonal-dimensions-for-psychoacoustic-sonification-1912.00766"/></url>
<url><loc>https://scifaro.com/en/abs/waveflow-a-compact-flow-based-model-for-raw-audio-1912.01219</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveflow-a-compact-flow-based-model-for-raw-audio-1912.01219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveflow-a-compact-flow-based-model-for-raw-audio-1912.01219"/></url>
<url><loc>https://scifaro.com/en/abs/hi-mia-a-far-field-text-dependent-speaker-verification-database-and-the-baselines-1912.01231</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hi-mia-a-far-field-text-dependent-speaker-verification-database-and-the-baselines-1912.01231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hi-mia-a-far-field-text-dependent-speaker-verification-database-and-the-baselines-1912.01231"/></url>
<url><loc>https://scifaro.com/en/abs/pitchnet-unsupervised-singing-voice-conversion-with-pitch-adversarial-network-1912.01852</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitchnet-unsupervised-singing-voice-conversion-with-pitch-adversarial-network-1912.01852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitchnet-unsupervised-singing-voice-conversion-with-pitch-adversarial-network-1912.01852"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-neural-vocoding-for-speech-generation-a-survey-1912.02461</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-neural-vocoding-for-speech-generation-a-survey-1912.02461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-neural-vocoding-for-speech-generation-a-survey-1912.02461"/></url>
<url><loc>https://scifaro.com/en/abs/voxsrc-2019-the-first-voxceleb-speaker-recognition-challenge-1912.02522</loc><lastmod>2019-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxsrc-2019-the-first-voxceleb-speaker-recognition-challenge-1912.02522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxsrc-2019-the-first-voxceleb-speaker-recognition-challenge-1912.02522"/></url>
<url><loc>https://scifaro.com/en/abs/a-supervised-speech-enhancement-approach-with-residual-noise-control-for-voice-communication-1912.03679</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-supervised-speech-enhancement-approach-with-residual-noise-control-for-voice-communication-1912.03679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-supervised-speech-enhancement-approach-with-residual-noise-control-for-voice-communication-1912.03679"/></url>
<url><loc>https://scifaro.com/en/abs/mitas-a-compressed-time-domain-audio-separation-network-with-parameter-sharing-1912.03884</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mitas-a-compressed-time-domain-audio-separation-network-with-parameter-sharing-1912.03884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mitas-a-compressed-time-domain-audio-separation-network-with-parameter-sharing-1912.03884"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-of-weakly-labelled-data-with-cnn-transformer-and-automatic-threshold-optimization-1912.04761</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-of-weakly-labelled-data-with-cnn-transformer-and-automatic-threshold-optimization-1912.04761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-of-weakly-labelled-data-with-cnn-transformer-and-automatic-threshold-optimization-1912.04761"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-graph-convolutional-network-1912.05124</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-graph-convolutional-network-1912.05124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-graph-convolutional-network-1912.05124"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-for-whispered-speech-synthesis-1912.05289</loc><lastmod>2020-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-for-whispered-speech-synthesis-1912.05289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-for-whispered-speech-synthesis-1912.05289"/></url>
<url><loc>https://scifaro.com/en/abs/encoding-musical-style-with-transformer-autoencoders-1912.05537</loc><lastmod>2020-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/encoding-musical-style-with-transformer-autoencoders-1912.05537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/encoding-musical-style-with-transformer-autoencoders-1912.05537"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-model-aspects-of-hearing-perception-using-neural-loss-functions-1912.05683</loc><lastmod>2019-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-model-aspects-of-hearing-perception-using-neural-loss-functions-1912.05683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-model-aspects-of-hearing-perception-using-neural-loss-functions-1912.05683"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-classification-with-parallel-temporal-spectral-attention-1912.06808</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-classification-with-parallel-temporal-spectral-attention-1912.06808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-classification-with-parallel-temporal-spectral-attention-1912.06808"/></url>
<url><loc>https://scifaro.com/en/abs/scattering-in-feedback-delay-networks-1912.08888</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scattering-in-feedback-delay-networks-1912.08888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scattering-in-feedback-delay-networks-1912.08888"/></url>
<url><loc>https://scifaro.com/en/abs/learning-singing-from-speech-1912.10128</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-singing-from-speech-1912.10128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-singing-from-speech-1912.10128"/></url>
<url><loc>https://scifaro.com/en/abs/panns-large-scale-pretrained-audio-neural-networks-for-audio-pattern-recognition-1912.10211</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/panns-large-scale-pretrained-audio-neural-networks-for-audio-pattern-recognition-1912.10211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/panns-large-scale-pretrained-audio-neural-networks-for-audio-pattern-recognition-1912.10211"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-prior-1912.10292</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-prior-1912.10292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-prior-1912.10292"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-from-speech-1912.10458</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-1912.10458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-1912.10458"/></url>
<url><loc>https://scifaro.com/en/abs/wykorzystanie-sztucznej-inteligencji-do-generowania-tre-sci-muzycznych-1912.10815</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wykorzystanie-sztucznej-inteligencji-do-generowania-tre-sci-muzycznych-1912.10815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wykorzystanie-sztucznej-inteligencji-do-generowania-tre-sci-muzycznych-1912.10815"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-automatic-mating-success-prediction-of-giant-pandas-1912.11333</loc><lastmod>2026-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-automatic-mating-success-prediction-of-giant-pandas-1912.11333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-automatic-mating-success-prediction-of-giant-pandas-1912.11333"/></url>
<url><loc>https://scifaro.com/en/abs/thuee-system-description-for-nist-2019-sre-cts-challenge-1912.11585</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thuee-system-description-for-nist-2019-sre-cts-challenge-1912.11585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thuee-system-description-for-nist-2019-sre-cts-challenge-1912.11585"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-permutation-invariant-training-with-latency-controlled-blstm-for-single-channel-multi-talker-speech-separation-1912.11613</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-permutation-invariant-training-with-latency-controlled-blstm-for-single-channel-multi-talker-speech-separation-1912.11613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-permutation-invariant-training-with-latency-controlled-blstm-for-single-channel-multi-talker-speech-separation-1912.11613"/></url>
<url><loc>https://scifaro.com/en/abs/score-and-lyrics-free-singing-voice-generation-1912.11747</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-and-lyrics-free-singing-voice-generation-1912.11747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-and-lyrics-free-singing-voice-generation-1912.11747"/></url>
<url><loc>https://scifaro.com/en/abs/moevc-a-mixture-of-experts-voice-conversion-system-with-sparse-gating-mechanism-for-accelerating-online-computation-1912.11984</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moevc-a-mixture-of-experts-voice-conversion-system-with-sparse-gating-mechanism-for-accelerating-online-computation-1912.11984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moevc-a-mixture-of-experts-voice-conversion-system-with-sparse-gating-mechanism-for-accelerating-online-computation-1912.11984"/></url>
<url><loc>https://scifaro.com/en/abs/cross-scale-attention-model-for-acoustic-event-classification-1912.12011</loc><lastmod>2020-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-scale-attention-model-for-acoustic-event-classification-1912.12011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-scale-attention-model-for-acoustic-event-classification-1912.12011"/></url>
<url><loc>https://scifaro.com/en/abs/nnaudio-an-on-the-fly-gpu-audio-to-spectrogram-conversion-toolbox-using-1d-convolution-neural-networks-1912.12055</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nnaudio-an-on-the-fly-gpu-audio-to-spectrogram-conversion-toolbox-using-1d-convolution-neural-networks-1912.12055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nnaudio-an-on-the-fly-gpu-audio-to-spectrogram-conversion-toolbox-using-1d-convolution-neural-networks-1912.12055"/></url>
<url><loc>https://scifaro.com/en/abs/complex-cepstrum-based-decomposition-of-speech-for-glottal-source-estimation-1912.12602</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-cepstrum-based-decomposition-of-speech-for-glottal-source-estimation-1912.12602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-cepstrum-based-decomposition-of-speech-for-glottal-source-estimation-1912.12602"/></url>
<url><loc>https://scifaro.com/en/abs/glottal-source-processing-from-analysis-to-applications-1912.12604</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glottal-source-processing-from-analysis-to-applications-1912.12604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glottal-source-processing-from-analysis-to-applications-1912.12604"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-pitch-extraction-algorithms-on-a-large-variety-of-singing-sounds-1912.12609</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-pitch-extraction-algorithms-on-a-large-variety-of-singing-sounds-1912.12609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-pitch-extraction-algorithms-on-a-large-variety-of-singing-sounds-1912.12609"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-on-acoustic-scene-classification-1912.12825</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-on-acoustic-scene-classification-1912.12825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-on-acoustic-scene-classification-1912.12825"/></url>
<url><loc>https://scifaro.com/en/abs/causal-anticausal-decomposition-of-speech-using-complex-cepstrum-for-glottal-source-estimation-1912.12843</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-anticausal-decomposition-of-speech-using-complex-cepstrum-for-glottal-source-estimation-1912.12843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-anticausal-decomposition-of-speech-using-complex-cepstrum-for-glottal-source-estimation-1912.12843"/></url>
<url><loc>https://scifaro.com/en/abs/using-a-pitch-synchronous-residual-codebook-for-hybrid-hmm-frame-selection-speech-synthesis-1912.12887</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-a-pitch-synchronous-residual-codebook-for-hybrid-hmm-frame-selection-speech-synthesis-1912.12887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-a-pitch-synchronous-residual-codebook-for-hybrid-hmm-frame-selection-speech-synthesis-1912.12887"/></url>
<url><loc>https://scifaro.com/en/abs/phase-based-information-for-voice-pathology-detection-2001.00372</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-based-information-for-voice-pathology-detection-2001.00372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-based-information-for-voice-pathology-detection-2001.00372"/></url>
<url><loc>https://scifaro.com/en/abs/deep-representation-learning-in-speech-processing-challenges-recent-advances-and-future-trends-2001.00378</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-representation-learning-in-speech-processing-challenges-recent-advances-and-future-trends-2001.00378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-representation-learning-in-speech-processing-challenges-recent-advances-and-future-trends-2001.00378"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-spatial-neural-filter-direction-informed-end-to-end-multi-channel-target-speech-separation-2001.00391</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-spatial-neural-filter-direction-informed-end-to-end-multi-channel-target-speech-separation-2001.00391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-spatial-neural-filter-direction-informed-end-to-end-multi-channel-target-speech-separation-2001.00391"/></url>
<url><loc>https://scifaro.com/en/abs/joint-robust-voicing-detection-and-pitch-estimation-based-on-residual-harmonics-2001.00459</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-robust-voicing-detection-and-pitch-estimation-based-on-residual-harmonics-2001.00459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-robust-voicing-detection-and-pitch-estimation-based-on-residual-harmonics-2001.00459"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-speech-signals-a-quantitative-review-2001.00473</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-speech-signals-a-quantitative-review-2001.00473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-glottal-closure-instants-from-speech-signals-a-quantitative-review-2001.00473"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-evaluation-of-pitch-modification-techniques-2001.00579</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-evaluation-of-pitch-modification-techniques-2001.00579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-evaluation-of-pitch-modification-techniques-2001.00579"/></url>
<url><loc>https://scifaro.com/en/abs/assessment-of-audio-features-for-automatic-cough-detection-2001.00580</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessment-of-audio-features-for-automatic-cough-detection-2001.00580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessment-of-audio-features-for-automatic-cough-detection-2001.00580"/></url>
<url><loc>https://scifaro.com/en/abs/eigenresiduals-for-improved-parametric-speech-synthesis-2001.00581</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eigenresiduals-for-improved-parametric-speech-synthesis-2001.00581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eigenresiduals-for-improved-parametric-speech-synthesis-2001.00581"/></url>
<url><loc>https://scifaro.com/en/abs/excitation-based-voice-quality-analysis-and-modification-2001.00582</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/excitation-based-voice-quality-analysis-and-modification-2001.00582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/excitation-based-voice-quality-analysis-and-modification-2001.00582"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-mutual-information-between-source-and-filter-contributions-for-voice-pathology-detection-2001.00583</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-mutual-information-between-source-and-filter-contributions-for-voice-pathology-detection-2001.00583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-mutual-information-between-source-and-filter-contributions-for-voice-pathology-detection-2001.00583"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-glottal-source-estimation-techniques-2001.00840</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-glottal-source-estimation-techniques-2001.00840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-glottal-source-estimation-techniques-2001.00840"/></url>
<url><loc>https://scifaro.com/en/abs/glottal-closure-and-opening-instant-detection-from-speech-signals-2001.00841</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glottal-closure-and-opening-instant-detection-from-speech-signals-2001.00841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glottal-closure-and-opening-instant-detection-from-speech-signals-2001.00841"/></url>
<url><loc>https://scifaro.com/en/abs/a-deterministic-plus-stochastic-model-of-the-residual-signal-for-improved-parametric-speech-synthesis-2001.00842</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deterministic-plus-stochastic-model-of-the-residual-signal-for-improved-parametric-speech-synthesis-2001.00842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deterministic-plus-stochastic-model-of-the-residual-signal-for-improved-parametric-speech-synthesis-2001.00842"/></url>
<url><loc>https://scifaro.com/en/abs/the-deterministic-plus-stochastic-model-of-the-residual-signal-and-its-applications-2001.01000</loc><lastmod>2020-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-deterministic-plus-stochastic-model-of-the-residual-signal-and-its-applications-2001.01000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-deterministic-plus-stochastic-model-of-the-residual-signal-and-its-applications-2001.01000"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-musical-structure-with-artificial-neural-networks-2001.01720</loc><lastmod>2020-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-musical-structure-with-artificial-neural-networks-2001.01720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-musical-structure-with-artificial-neural-networks-2001.01720"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-melody-harmonization-with-triad-chords-a-comparative-study-2001.02360</loc><lastmod>2021-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-melody-harmonization-with-triad-chords-a-comparative-study-2001.02360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-melody-harmonization-with-triad-chords-a-comparative-study-2001.02360"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-automatic-speech-recognition-with-the-transformer-model-2001.02674</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-automatic-speech-recognition-with-the-transformer-model-2001.02674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-automatic-speech-recognition-with-the-transformer-model-2001.02674"/></url>
<url><loc>https://scifaro.com/en/abs/cure-dataset-ladder-networks-for-audio-event-classification-2001.03896</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cure-dataset-ladder-networks-for-audio-event-classification-2001.03896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cure-dataset-ladder-networks-for-audio-event-classification-2001.03896"/></url>
<url><loc>https://scifaro.com/en/abs/learning-style-aware-symbolic-music-representations-by-adversarial-autoencoders-2001.05494</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-style-aware-symbolic-music-representations-by-adversarial-autoencoders-2001.05494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-style-aware-symbolic-music-representations-by-adversarial-autoencoders-2001.05494"/></url>
<url><loc>https://scifaro.com/en/abs/squeezewave-extremely-lightweight-vocoders-for-on-device-speech-synthesis-2001.05685</loc><lastmod>2020-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/squeezewave-extremely-lightweight-vocoders-for-on-device-speech-synthesis-2001.05685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/squeezewave-extremely-lightweight-vocoders-for-on-device-speech-synthesis-2001.05685"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-speaker-embedding-de-mixing-in-two-speaker-environment-2001.06397</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-speaker-embedding-de-mixing-in-two-speaker-environment-2001.06397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-speaker-embedding-de-mixing-in-two-speaker-environment-2001.06397"/></url>
<url><loc>https://scifaro.com/en/abs/jvs-music-japanese-multispeaker-singing-voice-corpus-2001.07044</loc><lastmod>2020-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jvs-music-japanese-multispeaker-singing-voice-corpus-2001.07044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jvs-music-japanese-multispeaker-singing-voice-corpus-2001.07044"/></url>
<url><loc>https://scifaro.com/en/abs/non-negative-matrix-factorization-convolutional-neural-network-nmf-cnn-for-sound-event-detection-2001.07874</loc><lastmod>2020-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-convolutional-neural-network-nmf-cnn-for-sound-event-detection-2001.07874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-negative-matrix-factorization-convolutional-neural-network-nmf-cnn-for-sound-event-detection-2001.07874"/></url>
<url><loc>https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-speech-quality-and-testing-framework-2001.08662</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-speech-quality-and-testing-framework-2001.08662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-speech-quality-and-testing-framework-2001.08662"/></url>
<url><loc>https://scifaro.com/en/abs/scattering-features-for-multimodal-gait-recognition-2001.08830</loc><lastmod>2020-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scattering-features-for-multimodal-gait-recognition-2001.08830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scattering-features-for-multimodal-gait-recognition-2001.08830"/></url>
<url><loc>https://scifaro.com/en/abs/learning-multi-instrument-classification-with-partial-labels-2001.08864</loc><lastmod>2020-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-multi-instrument-classification-with-partial-labels-2001.08864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-multi-instrument-classification-with-partial-labels-2001.08864"/></url>
<url><loc>https://scifaro.com/en/abs/lafurca-iterative-refined-speech-separation-based-on-context-aware-dual-path-parallel-bi-lstm-2001.08998</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lafurca-iterative-refined-speech-separation-based-on-context-aware-dual-path-parallel-bi-lstm-2001.08998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lafurca-iterative-refined-speech-separation-based-on-context-aware-dual-path-parallel-bi-lstm-2001.08998"/></url>
<url><loc>https://scifaro.com/en/abs/regression-based-music-emotion-prediction-using-triplet-neural-networks-2001.09988</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regression-based-music-emotion-prediction-using-triplet-neural-networks-2001.09988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regression-based-music-emotion-prediction-using-triplet-neural-networks-2001.09988"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-audio-input-representations-on-neural-network-based-music-transcription-2001.09989</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-audio-input-representations-on-neural-network-based-music-transcription-2001.09989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-audio-input-representations-on-neural-network-based-music-transcription-2001.09989"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-audio-source-separation-based-on-wave-u-net-combined-with-discrete-wavelet-transform-2001.10190</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-audio-source-separation-based-on-wave-u-net-combined-with-discrete-wavelet-transform-2001.10190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-audio-source-separation-based-on-wave-u-net-combined-with-discrete-wavelet-transform-2001.10190"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-reconstruction-in-rooms-inpainting-meets-super-resolution-2001.11263</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-reconstruction-in-rooms-inpainting-meets-super-resolution-2001.11263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-reconstruction-in-rooms-inpainting-meets-super-resolution-2001.11263"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-separation-dataset-and-analysis-2001.11482</loc><lastmod>2020-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-separation-dataset-and-analysis-2001.11482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-separation-dataset-and-analysis-2001.11482"/></url>
<url><loc>https://scifaro.com/en/abs/channel-attention-dense-u-net-for-multichannel-speech-enhancement-2001.11542</loc><lastmod>2020-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-attention-dense-u-net-for-multichannel-speech-enhancement-2001.11542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-attention-dense-u-net-for-multichannel-speech-enhancement-2001.11542"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-acoustic-modeling-using-mixed-bitrate-opus-compression-2002.00122</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-acoustic-modeling-using-mixed-bitrate-opus-compression-2002.00122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-acoustic-modeling-using-mixed-bitrate-opus-compression-2002.00122"/></url>
<url><loc>https://scifaro.com/en/abs/fully-learnable-front-end-for-multi-channel-acoustic-modeling-using-semi-supervised-learning-2002.00125</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-learnable-front-end-for-multi-channel-acoustic-modeling-using-semi-supervised-learning-2002.00125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-learnable-front-end-for-multi-channel-acoustic-modeling-using-semi-supervised-learning-2002.00125"/></url>
<url><loc>https://scifaro.com/en/abs/pop-music-transformer-beat-based-modeling-and-generation-of-expressive-pop-piano-compositions-2002.00212</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pop-music-transformer-beat-based-modeling-and-generation-of-expressive-pop-piano-compositions-2002.00212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pop-music-transformer-beat-based-modeling-and-generation-of-expressive-pop-piano-compositions-2002.00212"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-using-temporal-convolutional-recurrent-neural-networks-2002.00319</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-temporal-convolutional-recurrent-neural-networks-2002.00319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-temporal-convolutional-recurrent-neural-networks-2002.00319"/></url>
<url><loc>https://scifaro.com/en/abs/the-ffsvc-2020-evaluation-plan-2002.00387</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ffsvc-2020-evaluation-plan-2002.00387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ffsvc-2020-evaluation-plan-2002.00387"/></url>
<url><loc>https://scifaro.com/en/abs/dropclass-and-dropadapt-dropping-classes-for-deep-speaker-representation-learning-2002.00453</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dropclass-and-dropadapt-dropping-classes-for-deep-speaker-representation-learning-2002.00453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dropclass-and-dropadapt-dropping-classes-for-deep-speaker-representation-learning-2002.00453"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-with-depthwise-separable-and-dilated-convolutions-2002.00476</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-with-depthwise-separable-and-dilated-convolutions-2002.00476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-with-depthwise-separable-and-dilated-convolutions-2002.00476"/></url>
<url><loc>https://scifaro.com/en/abs/regularized-fast-multichannel-nonnegative-matrix-factorization-with-ilrma-based-prior-distribution-of-joint-diagonalization-process-2002.00579</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regularized-fast-multichannel-nonnegative-matrix-factorization-with-ilrma-based-prior-distribution-of-joint-diagonalization-process-2002.00579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regularized-fast-multichannel-nonnegative-matrix-factorization-with-ilrma-based-prior-distribution-of-joint-diagonalization-process-2002.00579"/></url>
<url><loc>https://scifaro.com/en/abs/oral-billiards-2002.00791</loc><lastmod>2021-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/oral-billiards-2002.00791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/oral-billiards-2002.00791"/></url>
<url><loc>https://scifaro.com/en/abs/limitations-of-weak-labels-for-embedding-and-tagging-2002.01687</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/limitations-of-weak-labels-for-embedding-and-tagging-2002.01687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/limitations-of-weak-labels-for-embedding-and-tagging-2002.01687"/></url>
<url><loc>https://scifaro.com/en/abs/source-separation-with-weakly-labelled-data-an-approach-to-computational-auditory-scene-analysis-2002.02065</loc><lastmod>2020-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-separation-with-weakly-labelled-data-an-approach-to-computational-auditory-scene-analysis-2002.02065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-separation-with-weakly-labelled-data-an-approach-to-computational-auditory-scene-analysis-2002.02065"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-melody-generation-via-disentangled-short-term-representations-and-structural-conditions-2002.02393</loc><lastmod>2020-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-melody-generation-via-disentangled-short-term-representations-and-structural-conditions-2002.02393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-melody-generation-via-disentangled-short-term-representations-and-structural-conditions-2002.02393"/></url>
<url><loc>https://scifaro.com/en/abs/robust-multi-channel-speech-recognition-using-frequency-aligned-network-2002.02520</loc><lastmod>2020-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-multi-channel-speech-recognition-using-frequency-aligned-network-2002.02520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-multi-channel-speech-recognition-using-frequency-aligned-network-2002.02520"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-musical-onset-probabilities-via-neural-distribution-learning-2002.03559</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-musical-onset-probabilities-via-neural-distribution-learning-2002.03559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-musical-onset-probabilities-via-neural-distribution-learning-2002.03559"/></url>
<url><loc>https://scifaro.com/en/abs/robust-deep-learning-framework-for-predicting-respiratory-anomalies-and-diseases-2002.03894</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-deep-learning-framework-for-predicting-respiratory-anomalies-and-diseases-2002.03894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-deep-learning-framework-for-predicting-respiratory-anomalies-and-diseases-2002.03894"/></url>
<url><loc>https://scifaro.com/en/abs/on-cross-corpus-generalization-of-deep-learning-based-speech-enhancement-2002.04027</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-cross-corpus-generalization-of-deep-learning-based-speech-enhancement-2002.04027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-cross-corpus-generalization-of-deep-learning-based-speech-enhancement-2002.04027"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-learning-of-audio-perception-for-robotics-applications-learning-to-project-data-to-t-sne-umap-space-2002.04076</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-learning-of-audio-perception-for-robotics-applications-learning-to-project-data-to-t-sne-umap-space-2002.04076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-learning-of-audio-perception-for-robotics-applications-learning-to-project-data-to-t-sne-umap-space-2002.04076"/></url>
<url><loc>https://scifaro.com/en/abs/robust-acoustic-scene-classification-using-a-multi-spectrogram-encoder-decoder-framework-2002.04502</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-acoustic-scene-classification-using-a-multi-spectrogram-encoder-decoder-framework-2002.04502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-acoustic-scene-classification-using-a-multi-spectrogram-encoder-decoder-framework-2002.04502"/></url>
<url><loc>https://scifaro.com/en/abs/cgcnn-complex-gabor-convolutional-neural-network-on-raw-speech-2002.04569</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cgcnn-complex-gabor-convolutional-neural-network-on-raw-speech-2002.04569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cgcnn-complex-gabor-convolutional-neural-network-on-raw-speech-2002.04569"/></url>
<url><loc>https://scifaro.com/en/abs/learning-with-out-of-distribution-data-for-audio-classification-2002.04683</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-with-out-of-distribution-data-for-audio-classification-2002.04683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-with-out-of-distribution-data-for-audio-classification-2002.04683"/></url>
<url><loc>https://scifaro.com/en/abs/periodicity-pitch-detection-in-complex-harmonies-on-eeg-timeline-data-2002.04990</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/periodicity-pitch-detection-in-complex-harmonies-on-eeg-timeline-data-2002.04990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/periodicity-pitch-detection-in-complex-harmonies-on-eeg-timeline-data-2002.04990"/></url>
<url><loc>https://scifaro.com/en/abs/deep-autotuner-a-pitch-correcting-network-for-singing-performances-2002.05511</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-autotuner-a-pitch-correcting-network-for-singing-performances-2002.05511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-autotuner-a-pitch-correcting-network-for-singing-performances-2002.05511"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-by-multitask-learning-of-sound-events-and-scenes-with-soft-scene-labels-2002.05848</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-by-multitask-learning-of-sound-events-and-scenes-with-soft-scene-labels-2002.05848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-by-multitask-learning-of-sound-events-and-scenes-with-soft-scene-labels-2002.05848"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-distributed-multichannel-mask-estimation-for-speech-enhancement-in-microphone-arrays-2002.06016</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-distributed-multichannel-mask-estimation-for-speech-enhancement-in-microphone-arrays-2002.06016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-distributed-multichannel-mask-estimation-for-speech-enhancement-in-microphone-arrays-2002.06016"/></url>
<url><loc>https://scifaro.com/en/abs/hodge-and-podge-hybrid-supervised-sound-event-detection-with-multi-hot-mixmatch-and-composition-consistence-training-2002.06021</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hodge-and-podge-hybrid-supervised-sound-event-detection-with-multi-hot-mixmatch-and-composition-consistence-training-2002.06021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hodge-and-podge-hybrid-supervised-sound-event-detection-with-multi-hot-mixmatch-and-composition-consistence-training-2002.06021"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speaker-embeddings-for-far-field-speaker-recognition-on-short-utterances-2002.06033</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speaker-embeddings-for-far-field-speaker-recognition-on-short-utterances-2002.06033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speaker-embeddings-for-far-field-speaker-recognition-on-short-utterances-2002.06033"/></url>
<url><loc>https://scifaro.com/en/abs/many-to-many-voice-conversion-using-conditional-cycle-consistent-adversarial-networks-2002.06328</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-using-conditional-cycle-consistent-adversarial-networks-2002.06328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-using-conditional-cycle-consistent-adversarial-networks-2002.06328"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-text-to-speech-system-via-joint-style-analysis-2002.06758</loc><lastmod>2020-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-text-to-speech-system-via-joint-style-analysis-2002.06758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-text-to-speech-system-via-joint-style-analysis-2002.06758"/></url>
<url><loc>https://scifaro.com/en/abs/lifter-training-and-sub-band-modeling-for-computationally-efficient-and-high-quality-voice-conversion-using-spectral-differentials-2002.06778</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lifter-training-and-sub-band-modeling-for-computationally-efficient-and-high-quality-voice-conversion-using-spectral-differentials-2002.06778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lifter-training-and-sub-band-modeling-for-computationally-efficient-and-high-quality-voice-conversion-using-spectral-differentials-2002.06778"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-the-confounds-of-accompaniments-in-singer-identification-2002.06817</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-the-confounds-of-accompaniments-in-singer-identification-2002.06817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-the-confounds-of-accompaniments-in-singer-identification-2002.06817"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-extractors-for-music-source-separation-2002.07016</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-extractors-for-music-source-separation-2002.07016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-extractors-for-music-source-separation-2002.07016"/></url>
<url><loc>https://scifaro.com/en/abs/performance-analysis-of-adaptive-noise-cancellation-for-speech-signal-2002.07677</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-analysis-of-adaptive-noise-cancellation-for-speech-signal-2002.07677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-analysis-of-adaptive-noise-cancellation-for-speech-signal-2002.07677"/></url>
<url><loc>https://scifaro.com/en/abs/convergence-guaranteed-independent-positive-semidefinite-tensor-analysis-based-on-student-s-t-distribution-2002.08582</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convergence-guaranteed-independent-positive-semidefinite-tensor-analysis-based-on-student-s-t-distribution-2002.08582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convergence-guaranteed-independent-positive-semidefinite-tensor-analysis-based-on-student-s-t-distribution-2002.08582"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-western-and-chinese-classical-music-based-on-soundscape-models-2002.09021</loc><lastmod>2020-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-western-and-chinese-classical-music-based-on-soundscape-models-2002.09021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-western-and-chinese-classical-music-based-on-soundscape-models-2002.09021"/></url>
<url><loc>https://scifaro.com/en/abs/decibel-improving-audio-chord-estimation-for-popular-music-by-alignment-and-integration-of-crowd-sourced-symbolic-representations-2002.09748</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decibel-improving-audio-chord-estimation-for-popular-music-by-alignment-and-integration-of-crowd-sourced-symbolic-representations-2002.09748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decibel-improving-audio-chord-estimation-for-popular-music-by-alignment-and-integration-of-crowd-sourced-symbolic-representations-2002.09748"/></url>
<url><loc>https://scifaro.com/en/abs/rhythm-chord-and-melody-generation-for-lead-sheets-using-recurrent-neural-networks-2002.10266</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythm-chord-and-melody-generation-for-lead-sheets-using-recurrent-neural-networks-2002.10266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythm-chord-and-melody-generation-for-lead-sheets-using-recurrent-neural-networks-2002.10266"/></url>
<url><loc>https://scifaro.com/en/abs/autofoley-artificial-synthesis-of-synchronized-sound-tracks-for-silent-videos-with-deep-learning-2002.10981</loc><lastmod>2020-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autofoley-artificial-synthesis-of-synchronized-sound-tracks-for-silent-videos-with-deep-learning-2002.10981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autofoley-artificial-synthesis-of-synchronized-sound-tracks-for-silent-videos-with-deep-learning-2002.10981"/></url>
<url><loc>https://scifaro.com/en/abs/rtmobile-beyond-real-time-mobile-acceleration-of-rnns-for-speech-recognition-2002.11474</loc><lastmod>2020-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rtmobile-beyond-real-time-mobile-acceleration-of-rnns-for-speech-recognition-2002.11474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rtmobile-beyond-real-time-mobile-acceleration-of-rnns-for-speech-recognition-2002.11474"/></url>
<url><loc>https://scifaro.com/en/abs/an-open-set-recognition-and-few-shot-learning-dataset-for-audio-event-classification-in-domestic-environments-2002.11561</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-open-set-recognition-and-few-shot-learning-dataset-for-audio-event-classification-in-domestic-environments-2002.11561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-open-set-recognition-and-few-shot-learning-dataset-for-audio-event-classification-in-domestic-environments-2002.11561"/></url>
<url><loc>https://scifaro.com/en/abs/harmonics-based-representation-in-clarinet-tone-quality-evaluation-2003.00414</loc><lastmod>2020-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonics-based-representation-in-clarinet-tone-quality-evaluation-2003.00414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonics-based-representation-in-clarinet-tone-quality-evaluation-2003.00414"/></url>
<url><loc>https://scifaro.com/en/abs/one-or-two-components-the-scattering-transform-answers-2003.01037</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-or-two-components-the-scattering-transform-answers-2003.01037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-or-two-components-the-scattering-transform-answers-2003.01037"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-network-based-framework-for-archetypical-sound-synthesis-2003.03160</loc><lastmod>2020-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-network-based-framework-for-archetypical-sound-synthesis-2003.03160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-network-based-framework-for-archetypical-sound-synthesis-2003.03160"/></url>
<url><loc>https://scifaro.com/en/abs/wavelet-based-spatial-audio-framework-2003.03287</loc><lastmod>2020-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavelet-based-spatial-audio-framework-2003.03287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavelet-based-spatial-audio-framework-2003.03287"/></url>
<url><loc>https://scifaro.com/en/abs/high-resolution-speaker-counting-in-reverberant-rooms-using-crnn-with-ambisonics-features-2003.07839</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-resolution-speaker-counting-in-reverberant-rooms-using-crnn-with-ambisonics-features-2003.07839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-resolution-speaker-counting-in-reverberant-rooms-using-crnn-with-ambisonics-features-2003.07839"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-cross-corpus-speech-emotion-recognition-2003.07996</loc><lastmod>2020-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-cross-corpus-speech-emotion-recognition-2003.07996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-cross-corpus-speech-emotion-recognition-2003.07996"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-doa-estimation-through-pattern-recognition-of-the-modal-coherence-of-a-reverberant-soundfield-2003.08050</loc><lastmod>2020-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-doa-estimation-through-pattern-recognition-of-the-modal-coherence-of-a-reverberant-soundfield-2003.08050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-doa-estimation-through-pattern-recognition-of-the-modal-coherence-of-a-reverberant-soundfield-2003.08050"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-replay-attacks-using-multi-channel-audio-a-neural-network-based-method-2003.08225</loc><lastmod>2020-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-replay-attacks-using-multi-channel-audio-a-neural-network-based-method-2003.08225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-replay-attacks-using-multi-channel-audio-a-neural-network-based-method-2003.08225"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-with-squeeze-excitation-residual-networks-2003.09284</loc><lastmod>2020-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-with-squeeze-excitation-residual-networks-2003.09284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-with-squeeze-excitation-residual-networks-2003.09284"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-inherent-properties-of-the-monophonic-melody-of-songs-2003.09287</loc><lastmod>2020-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-inherent-properties-of-the-monophonic-melody-of-songs-2003.09287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-inherent-properties-of-the-monophonic-melody-of-songs-2003.09287"/></url>
<url><loc>https://scifaro.com/en/abs/a-quantum-vocal-theory-of-sound-2003.09632</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-quantum-vocal-theory-of-sound-2003.09632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-quantum-vocal-theory-of-sound-2003.09632"/></url>
<url><loc>https://scifaro.com/en/abs/a-time-domain-monaural-speech-enhancement-with-feedback-learning-2003.09815</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-time-domain-monaural-speech-enhancement-with-feedback-learning-2003.09815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-time-domain-monaural-speech-enhancement-with-feedback-learning-2003.09815"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-u-net-for-music-source-separation-2003.10414</loc><lastmod>2020-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-u-net-for-music-source-separation-2003.10414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-u-net-for-music-source-separation-2003.10414"/></url>
<url><loc>https://scifaro.com/en/abs/bulbar-als-detection-based-on-analysis-of-voice-perturbation-and-vibrato-2003.10806</loc><lastmod>2020-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bulbar-als-detection-based-on-analysis-of-voice-perturbation-and-vibrato-2003.10806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bulbar-als-detection-based-on-analysis-of-voice-perturbation-and-vibrato-2003.10806"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-and-computer-audition-an-overview-on-what-speech-sound-analysis-could-contribute-in-the-sars-cov-2-corona-crisis-2003.11117</loc><lastmod>2020-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-and-computer-audition-an-overview-on-what-speech-sound-analysis-could-contribute-in-the-sars-cov-2-corona-crisis-2003.11117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-and-computer-audition-an-overview-on-what-speech-sound-analysis-could-contribute-in-the-sars-cov-2-corona-crisis-2003.11117"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-via-weakly-supervised-sound-event-detection-2003.12222</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-via-weakly-supervised-sound-event-detection-2003.12222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-via-weakly-supervised-sound-event-detection-2003.12222"/></url>
<url><loc>https://scifaro.com/en/abs/a-recursive-network-with-dynamic-attention-for-monaural-speech-enhancement-2003.12973</loc><lastmod>2020-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-recursive-network-with-dynamic-attention-for-monaural-speech-enhancement-2003.12973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-recursive-network-with-dynamic-attention-for-monaural-speech-enhancement-2003.12973"/></url>
<url><loc>https://scifaro.com/en/abs/am-mobilenet1d-a-portable-model-for-speaker-recognition-2004.00132</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/am-mobilenet1d-a-portable-model-for-speaker-recognition-2004.00132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/am-mobilenet1d-a-portable-model-for-speaker-recognition-2004.00132"/></url>
<url><loc>https://scifaro.com/en/abs/improving-perceptual-quality-of-drum-transcription-with-the-expanded-groove-midi-dataset-2004.00188</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-perceptual-quality-of-drum-transcription-with-the-expanded-groove-midi-dataset-2004.00188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-perceptual-quality-of-drum-transcription-with-the-expanded-groove-midi-dataset-2004.00188"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-video-to-audio-transformation-using-deep-recurrent-neural-networks-and-a-neuro-fuzzy-system-2004.02113</loc><lastmod>2020-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-video-to-audio-transformation-using-deep-recurrent-neural-networks-and-a-neuro-fuzzy-system-2004.02113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-video-to-audio-transformation-using-deep-recurrent-neural-networks-and-a-neuro-fuzzy-system-2004.02113"/></url>
<url><loc>https://scifaro.com/en/abs/conditioned-source-separation-for-music-instrument-performances-2004.03873</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioned-source-separation-for-music-instrument-performances-2004.03873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioned-source-separation-for-music-instrument-performances-2004.03873"/></url>
<url><loc>https://scifaro.com/en/abs/gga-mg-generative-genetic-algorithm-for-music-generation-2004.04687</loc><lastmod>2020-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gga-mg-generative-genetic-algorithm-for-music-generation-2004.04687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gga-mg-generative-genetic-algorithm-for-music-generation-2004.04687"/></url>
<url><loc>https://scifaro.com/en/abs/musical-features-for-automatic-music-transcription-evaluation-2004.07171</loc><lastmod>2020-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-features-for-automatic-music-transcription-evaluation-2004.07171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-features-for-automatic-music-transcription-evaluation-2004.07171"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-in-bengali-language-from-nonlinear-features-2004.07820</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-in-bengali-language-from-nonlinear-features-2004.07820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-in-bengali-language-from-nonlinear-features-2004.07820"/></url>
<url><loc>https://scifaro.com/en/abs/beat-detection-and-automatic-annotation-of-the-music-of-bharatanatyam-dance-using-speech-recognition-techniques-2004.08269</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beat-detection-and-automatic-annotation-of-the-music-of-bharatanatyam-dance-using-speech-recognition-techniques-2004.08269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beat-detection-and-automatic-annotation-of-the-music-of-bharatanatyam-dance-using-speech-recognition-techniques-2004.08269"/></url>
<url><loc>https://scifaro.com/en/abs/chime-6-challenge-tackling-multispeaker-speech-recognition-for-unsegmented-recordings-2004.09249</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chime-6-challenge-tackling-multispeaker-speech-recognition-for-unsegmented-recordings-2004.09249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chime-6-challenge-tackling-multispeaker-speech-recognition-for-unsegmented-recordings-2004.09249"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-to-dialog-act-recognition-2004.11419</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-to-dialog-act-recognition-2004.11419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-to-dialog-act-recognition-2004.11419"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-trained-transformers-models-for-spoken-language-translation-2004.12111</loc><lastmod>2020-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-trained-transformers-models-for-spoken-language-translation-2004.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-trained-transformers-models-for-spoken-language-translation-2004.12111"/></url>
<url><loc>https://scifaro.com/en/abs/depthwise-separable-convolutional-resnet-with-squeeze-and-excitation-blocks-for-small-footprint-keyword-spotting-2004.12200</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/depthwise-separable-convolutional-resnet-with-squeeze-and-excitation-blocks-for-small-footprint-keyword-spotting-2004.12200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/depthwise-separable-convolutional-resnet-with-squeeze-and-excitation-blocks-for-small-footprint-keyword-spotting-2004.12200"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-feature-learning-and-unsupervised-clustering-based-speech-synthesis-for-found-data-with-acoustic-and-textual-noise-2004.13595</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-feature-learning-and-unsupervised-clustering-based-speech-synthesis-for-found-data-with-acoustic-and-textual-noise-2004.13595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-feature-learning-and-unsupervised-clustering-based-speech-synthesis-for-found-data-with-acoustic-and-textual-noise-2004.13595"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-voices-and-hearing-voices-learning-discriminative-embeddings-using-cross-modal-self-supervision-2004.14326</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-voices-and-hearing-voices-learning-discriminative-embeddings-using-cross-modal-self-supervision-2004.14326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-voices-and-hearing-voices-learning-discriminative-embeddings-using-cross-modal-self-supervision-2004.14326"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-missing-labels-in-large-scale-sound-event-recognition-using-a-teacher-student-framework-with-loss-masking-2005.00878</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-missing-labels-in-large-scale-sound-event-recognition-using-a-teacher-student-framework-with-loss-masking-2005.00878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-missing-labels-in-large-scale-sound-event-recognition-using-a-teacher-student-framework-with-loss-masking-2005.00878"/></url>
<url><loc>https://scifaro.com/en/abs/dual-track-music-generation-using-deep-learning-2005.04353</loc><lastmod>2020-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-track-music-generation-using-deep-learning-2005.04353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-track-music-generation-using-deep-learning-2005.04353"/></url>
<url><loc>https://scifaro.com/en/abs/cognitive-driven-convolutional-beamforming-using-eeg-based-auditory-attention-decoding-2005.04669</loc><lastmod>2020-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cognitive-driven-convolutional-beamforming-using-eeg-based-auditory-attention-decoding-2005.04669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cognitive-driven-convolutional-beamforming-using-eeg-based-auditory-attention-decoding-2005.04669"/></url>
<url><loc>https://scifaro.com/en/abs/chirp-complex-cepstrum-based-decomposition-for-asynchronous-glottal-analysis-2005.04724</loc><lastmod>2020-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chirp-complex-cepstrum-based-decomposition-for-asynchronous-glottal-analysis-2005.04724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chirp-complex-cepstrum-based-decomposition-for-asynchronous-glottal-analysis-2005.04724"/></url>
<url><loc>https://scifaro.com/en/abs/gacela-a-generative-adversarial-context-encoder-for-long-audio-inpainting-2005.05032</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gacela-a-generative-adversarial-context-encoder-for-long-audio-inpainting-2005.05032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gacela-a-generative-adversarial-context-encoder-for-long-audio-inpainting-2005.05032"/></url>
<url><loc>https://scifaro.com/en/abs/online-monaural-speech-enhancement-using-delayed-subband-lstm-2005.05037</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-monaural-speech-enhancement-using-delayed-subband-lstm-2005.05037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-monaural-speech-enhancement-using-delayed-subband-lstm-2005.05037"/></url>
<url><loc>https://scifaro.com/en/abs/multi-band-melgan-faster-waveform-generation-for-high-quality-text-to-speech-2005.05106</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-band-melgan-faster-waveform-generation-for-high-quality-text-to-speech-2005.05106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-band-melgan-faster-waveform-generation-for-high-quality-text-to-speech-2005.05106"/></url>
<url><loc>https://scifaro.com/en/abs/featherwave-an-efficient-high-fidelity-neural-vocoder-with-multi-band-linear-prediction-2005.05551</loc><lastmod>2020-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/featherwave-an-efficient-high-fidelity-neural-vocoder-with-multi-band-linear-prediction-2005.05551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/featherwave-an-efficient-high-fidelity-neural-vocoder-with-multi-band-linear-prediction-2005.05551"/></url>
<url><loc>https://scifaro.com/en/abs/adadurian-few-shot-adaptation-for-neural-text-to-speech-with-durian-2005.05642</loc><lastmod>2020-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adadurian-few-shot-adaptation-for-neural-text-to-speech-with-durian-2005.05642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adadurian-few-shot-adaptation-for-neural-text-to-speech-with-durian-2005.05642"/></url>
<url><loc>https://scifaro.com/en/abs/creative-quantum-computing-inverse-fft-sound-synthesis-adaptive-sequencing-and-musical-composition-2005.05832</loc><lastmod>2021-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creative-quantum-computing-inverse-fft-sound-synthesis-adaptive-sequencing-and-musical-composition-2005.05832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creative-quantum-computing-inverse-fft-sound-synthesis-adaptive-sequencing-and-musical-composition-2005.05832"/></url>
<url><loc>https://scifaro.com/en/abs/the-ioa-system-for-deep-noise-suppression-challenge-using-a-framework-combining-dynamic-attention-and-recursive-learning-2005.05855</loc><lastmod>2020-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ioa-system-for-deep-noise-suppression-challenge-using-a-framework-combining-dynamic-attention-and-recursive-learning-2005.05855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ioa-system-for-deep-noise-suppression-challenge-using-a-framework-combining-dynamic-attention-and-recursive-learning-2005.05855"/></url>
<url><loc>https://scifaro.com/en/abs/flowtron-an-autoregressive-flow-based-generative-network-for-text-to-speech-synthesis-2005.05957</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowtron-an-autoregressive-flow-based-generative-network-for-text-to-speech-synthesis-2005.05957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowtron-an-autoregressive-flow-based-generative-network-for-text-to-speech-synthesis-2005.05957"/></url>
<url><loc>https://scifaro.com/en/abs/converting-anyone-s-emotion-towards-speaker-independent-emotional-voice-conversion-2005.07025</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/converting-anyone-s-emotion-towards-speaker-independent-emotional-voice-conversion-2005.07025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/converting-anyone-s-emotion-towards-speaker-independent-emotional-voice-conversion-2005.07025"/></url>
<url><loc>https://scifaro.com/en/abs/facefilter-audio-visual-speech-separation-using-still-images-2005.07074</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/facefilter-audio-visual-speech-separation-using-still-images-2005.07074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/facefilter-audio-visual-speech-separation-using-still-images-2005.07074"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-neural-chord-estimation-based-on-a-variational-autoencoder-with-latent-chord-labels-and-features-2005.07091</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-neural-chord-estimation-based-on-a-variational-autoencoder-with-latent-chord-labels-and-features-2005.07091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-neural-chord-estimation-based-on-a-variational-autoencoder-with-latent-chord-labels-and-features-2005.07091"/></url>
<url><loc>https://scifaro.com/en/abs/reverberation-modeling-for-source-filter-based-neural-vocoder-2005.07379</loc><lastmod>2020-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberation-modeling-for-source-filter-based-neural-vocoder-2005.07379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberation-modeling-for-source-filter-based-neural-vocoder-2005.07379"/></url>
<url><loc>https://scifaro.com/en/abs/glottal-source-estimation-using-an-automatic-chirp-decomposition-2005.07897</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glottal-source-estimation-using-an-automatic-chirp-decomposition-2005.07897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glottal-source-estimation-using-an-automatic-chirp-decomposition-2005.07897"/></url>
<url><loc>https://scifaro.com/en/abs/oscillating-statistical-moments-for-speech-polarity-detection-2005.07901</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/oscillating-statistical-moments-for-speech-polarity-detection-2005.07901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/oscillating-statistical-moments-for-speech-polarity-detection-2005.07901"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-scheme-by-combining-dnn-model-with-gmm-model-2005.08184</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-scheme-by-combining-dnn-model-with-gmm-model-2005.08184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-scheme-by-combining-dnn-model-with-gmm-model-2005.08184"/></url>
<url><loc>https://scifaro.com/en/abs/augmenting-generative-adversarial-networks-for-speech-emotion-recognition-2005.08447</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmenting-generative-adversarial-networks-for-speech-emotion-recognition-2005.08447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmenting-generative-adversarial-networks-for-speech-emotion-recognition-2005.08447"/></url>
<url><loc>https://scifaro.com/en/abs/deep-architecture-enhancing-robustness-to-noise-adversarial-attacks-and-cross-corpus-setting-for-speech-emotion-recognition-2005.08453</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-architecture-enhancing-robustness-to-noise-adversarial-attacks-and-cross-corpus-setting-for-speech-emotion-recognition-2005.08453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-architecture-enhancing-robustness-to-noise-adversarial-attacks-and-cross-corpus-setting-for-speech-emotion-recognition-2005.08453"/></url>
<url><loc>https://scifaro.com/en/abs/surfboard-audio-feature-extraction-for-modern-machine-learning-2005.08848</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surfboard-audio-feature-extraction-for-modern-machine-learning-2005.08848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surfboard-audio-feature-extraction-for-modern-machine-learning-2005.08848"/></url>
<url><loc>https://scifaro.com/en/abs/saving-the-sonorine-photovisual-audio-recovery-using-image-processing-and-computer-vision-techniques-2005.08944</loc><lastmod>2020-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saving-the-sonorine-photovisual-audio-recovery-using-image-processing-and-computer-vision-techniques-2005.08944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saving-the-sonorine-photovisual-audio-recovery-using-image-processing-and-computer-vision-techniques-2005.08944"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-echo-cancellation-by-combining-adaptive-digital-filter-and-recurrent-neural-network-2005.09237</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-by-combining-adaptive-digital-filter-and-recurrent-neural-network-2005.09237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-by-combining-adaptive-digital-filter-and-recurrent-neural-network-2005.09237"/></url>
<url><loc>https://scifaro.com/en/abs/a-lite-microphone-array-beamforming-scheme-with-maximum-signal-to-noise-ratio-filter-2005.09238</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lite-microphone-array-beamforming-scheme-with-maximum-signal-to-noise-ratio-filter-2005.09238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lite-microphone-array-beamforming-scheme-with-maximum-signal-to-noise-ratio-filter-2005.09238"/></url>
<url><loc>https://scifaro.com/en/abs/competitive-wakeup-scheme-for-distributed-devices-2005.09242</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/competitive-wakeup-scheme-for-distributed-devices-2005.09242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/competitive-wakeup-scheme-for-distributed-devices-2005.09242"/></url>
<url><loc>https://scifaro.com/en/abs/saddel-joint-speech-separation-and-denoising-model-based-on-multitask-learning-2005.09966</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saddel-joint-speech-separation-and-denoising-model-based-on-multitask-learning-2005.09966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saddel-joint-speech-separation-and-denoising-model-based-on-multitask-learning-2005.09966"/></url>
<url><loc>https://scifaro.com/en/abs/sparsity-based-audio-declipping-methods-selected-overview-new-algorithms-and-large-scale-evaluation-2005.10228</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsity-based-audio-declipping-methods-selected-overview-new-algorithms-and-large-scale-evaluation-2005.10228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsity-based-audio-declipping-methods-selected-overview-new-algorithms-and-large-scale-evaluation-2005.10228"/></url>
<url><loc>https://scifaro.com/en/abs/conversational-end-to-end-tts-for-voice-agent-2005.10438</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conversational-end-to-end-tts-for-voice-agent-2005.10438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conversational-end-to-end-tts-for-voice-agent-2005.10438"/></url>
<url><loc>https://scifaro.com/en/abs/simplified-self-attention-for-transformer-based-end-to-end-speech-recognition-2005.10463</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simplified-self-attention-for-transformer-based-end-to-end-speech-recognition-2005.10463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simplified-self-attention-for-transformer-based-end-to-end-speech-recognition-2005.10463"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-interpretable-deep-learning-classifier-for-heart-anomaly-detection-without-segmentation-2005.10480</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-interpretable-deep-learning-classifier-for-heart-anomaly-detection-without-segmentation-2005.10480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-interpretable-deep-learning-classifier-for-heart-anomaly-detection-without-segmentation-2005.10480"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-to-beethoven-s-10th-symphony-2005.10539</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-to-beethoven-s-10th-symphony-2005.10539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-to-beethoven-s-10th-symphony-2005.10539"/></url>
<url><loc>https://scifaro.com/en/abs/inaudible-adversarial-perturbations-for-targeted-attack-in-speaker-recognition-2005.10637</loc><lastmod>2020-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inaudible-adversarial-perturbations-for-targeted-attack-in-speaker-recognition-2005.10637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inaudible-adversarial-perturbations-for-targeted-attack-in-speaker-recognition-2005.10637"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-evaluation-of-importance-maps-in-automatic-speech-recognition-2005.10929</loc><lastmod>2020-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-evaluation-of-importance-maps-in-automatic-speech-recognition-2005.10929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-evaluation-of-importance-maps-in-automatic-speech-recognition-2005.10929"/></url>
<url><loc>https://scifaro.com/en/abs/power-pooling-operators-and-confidence-learning-for-semi-supervised-sound-event-detection-2005.11459</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/power-pooling-operators-and-confidence-learning-for-semi-supervised-sound-event-detection-2005.11459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/power-pooling-operators-and-confidence-learning-for-semi-supervised-sound-event-detection-2005.11459"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-auditory-object-recognition-via-inception-nucleus-2005.12195</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-auditory-object-recognition-via-inception-nucleus-2005.12195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-auditory-object-recognition-via-inception-nucleus-2005.12195"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-posture-classification-using-instantaneous-intraspeech-breathing-features-2005.12230</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-posture-classification-using-instantaneous-intraspeech-breathing-features-2005.12230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-posture-classification-using-instantaneous-intraspeech-breathing-features-2005.12230"/></url>
<url><loc>https://scifaro.com/en/abs/infantnet-a-deep-neural-network-for-analyzing-infant-vocalizations-2005.12412</loc><lastmod>2020-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infantnet-a-deep-neural-network-for-analyzing-infant-vocalizations-2005.12412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infantnet-a-deep-neural-network-for-analyzing-infant-vocalizations-2005.12412"/></url>
<url><loc>https://scifaro.com/en/abs/sound-context-classification-basing-on-join-learning-model-and-multi-spectrogram-features-2005.12779</loc><lastmod>2020-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-context-classification-basing-on-join-learning-model-and-multi-spectrogram-features-2005.12779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-context-classification-basing-on-join-learning-model-and-multi-spectrogram-features-2005.12779"/></url>
<url><loc>https://scifaro.com/en/abs/tonal-harmony-and-the-topology-of-dynamical-score-networks-2006.01033</loc><lastmod>2021-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tonal-harmony-and-the-topology-of-dynamical-score-networks-2006.01033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tonal-harmony-and-the-topology-of-dynamical-score-networks-2006.01033"/></url>
<url><loc>https://scifaro.com/en/abs/an-asr-guided-speech-intelligibility-measure-for-tts-model-selection-2006.01463</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-asr-guided-speech-intelligibility-measure-for-tts-model-selection-2006.01463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-asr-guided-speech-intelligibility-measure-for-tts-model-selection-2006.01463"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-chunk-aware-multihead-attention-for-online-end-to-end-speech-recognition-2006.01712</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-chunk-aware-multihead-attention-for-online-end-to-end-speech-recognition-2006.01712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-chunk-aware-multihead-attention-for-online-end-to-end-speech-recognition-2006.01712"/></url>
<url><loc>https://scifaro.com/en/abs/san-m-memory-equipped-self-attention-for-end-to-end-speech-recognition-2006.01713</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/san-m-memory-equipped-self-attention-for-end-to-end-speech-recognition-2006.01713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/san-m-memory-equipped-self-attention-for-end-to-end-speech-recognition-2006.01713"/></url>
<url><loc>https://scifaro.com/en/abs/cross-entropy-as-objective-function-for-music-generative-models-2006.02217</loc><lastmod>2020-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-entropy-as-objective-function-for-music-generative-models-2006.02217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-entropy-as-objective-function-for-music-generative-models-2006.02217"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-more-realistic-room-simulation-for-far-field-keyword-spotting-2006.02774</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-more-realistic-room-simulation-for-far-field-keyword-spotting-2006.02774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-more-realistic-room-simulation-for-far-field-keyword-spotting-2006.02774"/></url>
<url><loc>https://scifaro.com/en/abs/pjs-phoneme-balanced-japanese-singing-voice-corpus-2006.02959</loc><lastmod>2020-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pjs-phoneme-balanced-japanese-singing-voice-corpus-2006.02959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pjs-phoneme-balanced-japanese-singing-voice-corpus-2006.02959"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-method-towards-speech-files-local-features-investigation-2006.03388</loc><lastmod>2020-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-method-towards-speech-files-local-features-investigation-2006.03388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-method-towards-speech-files-local-features-investigation-2006.03388"/></url>
<url><loc>https://scifaro.com/en/abs/audio-captioning-using-gated-recurrent-units-2006.03391</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-captioning-using-gated-recurrent-units-2006.03391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-captioning-using-gated-recurrent-units-2006.03391"/></url>
<url><loc>https://scifaro.com/en/abs/application-of-optimization-and-simulation-to-musical-composition-that-emerges-dynamically-during-ensemble-singing-performance-2006.03471</loc><lastmod>2020-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/application-of-optimization-and-simulation-to-musical-composition-that-emerges-dynamically-during-ensemble-singing-performance-2006.03471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/application-of-optimization-and-simulation-to-musical-composition-that-emerges-dynamically-during-ensemble-singing-performance-2006.03471"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-adversarial-text-to-speech-2006.03575</loc><lastmod>2021-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-adversarial-text-to-speech-2006.03575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-adversarial-text-to-speech-2006.03575"/></url>
<url><loc>https://scifaro.com/en/abs/wavenode-a-continuous-normalizing-flow-for-speech-synthesis-2006.04598</loc><lastmod>2020-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavenode-a-continuous-normalizing-flow-for-speech-synthesis-2006.04598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavenode-a-continuous-normalizing-flow-for-speech-synthesis-2006.04598"/></url>
<url><loc>https://scifaro.com/en/abs/the-hitchhiker-s-guide-to-the-all-interval-12-tone-rows-2006.05007</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hitchhiker-s-guide-to-the-all-interval-12-tone-rows-2006.05007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hitchhiker-s-guide-to-the-all-interval-12-tone-rows-2006.05007"/></url>
<url><loc>https://scifaro.com/en/abs/c-sl-contrastive-sound-localization-with-inertial-acoustic-sensors-2006.05071</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/c-sl-contrastive-sound-localization-with-inertial-acoustic-sensors-2006.05071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/c-sl-contrastive-sound-localization-with-inertial-acoustic-sensors-2006.05071"/></url>
<url><loc>https://scifaro.com/en/abs/audino-a-modern-annotation-tool-for-audio-and-speech-2006.05236</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audino-a-modern-annotation-tool-for-audio-and-speech-2006.05236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audino-a-modern-annotation-tool-for-audio-and-speech-2006.05236"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-automatic-diagnosis-of-covid-19-from-crowdsourced-respiratory-sound-data-2006.05919</loc><lastmod>2021-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-automatic-diagnosis-of-covid-19-from-crowdsourced-respiratory-sound-data-2006.05919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-automatic-diagnosis-of-covid-19-from-crowdsourced-respiratory-sound-data-2006.05919"/></url>
<url><loc>https://scifaro.com/en/abs/perceiving-music-quality-with-gans-2006.06287</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceiving-music-quality-with-gans-2006.06287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceiving-music-quality-with-gans-2006.06287"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-attention-based-generative-adversarial-network-with-phase-post-processing-for-speech-enhancement-2006.07530</loc><lastmod>2020-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-attention-based-generative-adversarial-network-with-phase-post-processing-for-speech-enhancement-2006.07530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-attention-based-generative-adversarial-network-with-phase-post-processing-for-speech-enhancement-2006.07530"/></url>
<url><loc>https://scifaro.com/en/abs/assisted-music-creation-with-flow-machines-towards-new-categories-of-new-2006.09232</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assisted-music-creation-with-flow-machines-towards-new-categories-of-new-2006.09232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assisted-music-creation-with-flow-machines-towards-new-categories-of-new-2006.09232"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-visualisation-of-fugue-played-by-a-string-quartet-2006.10168</loc><lastmod>2020-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-visualisation-of-fugue-played-by-a-string-quartet-2006.10168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-visualisation-of-fugue-played-by-a-string-quartet-2006.10168"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-musical-intelligence-a-survey-2006.10553</loc><lastmod>2020-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-musical-intelligence-a-survey-2006.10553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-musical-intelligence-a-survey-2006.10553"/></url>
<url><loc>https://scifaro.com/en/abs/musical-smart-city-perspectives-on-ubiquitous-sonification-2006.12305</loc><lastmod>2020-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-smart-city-perspectives-on-ubiquitous-sonification-2006.12305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-smart-city-perspectives-on-ubiquitous-sonification-2006.12305"/></url>
<url><loc>https://scifaro.com/en/abs/bach-or-mock-a-grading-function-for-chorales-in-the-style-of-j-s-bach-2006.13329</loc><lastmod>2020-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bach-or-mock-a-grading-function-for-chorales-in-the-style-of-j-s-bach-2006.13329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bach-or-mock-a-grading-function-for-chorales-in-the-style-of-j-s-bach-2006.13329"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-music-knowledge-in-continual-dataset-augmentation-for-music-generation-2006.13331</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-music-knowledge-in-continual-dataset-augmentation-for-music-generation-2006.13331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-music-knowledge-in-continual-dataset-augmentation-for-music-generation-2006.13331"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-baroque-two-part-counterpoint-with-neural-machine-translation-2006.14221</loc><lastmod>2022-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-baroque-two-part-counterpoint-with-neural-machine-translation-2006.14221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-baroque-two-part-counterpoint-with-neural-machine-translation-2006.14221"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2006.14436</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2006.14436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2006.14436"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-using-duration-robust-loss-function-2006.15253</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-using-duration-robust-loss-function-2006.15253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-using-duration-robust-loss-function-2006.15253"/></url>
<url><loc>https://scifaro.com/en/abs/beneath-or-beyond-the-surface-discovering-voice-leading-patterns-with-skip-grams-2006.15399</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beneath-or-beyond-the-surface-discovering-voice-leading-patterns-with-skip-grams-2006.15399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beneath-or-beyond-the-surface-discovering-voice-leading-patterns-with-skip-grams-2006.15399"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-versus-noise-compensation-for-x-vector-speaker-recognition-systems-in-noisy-environments-2006.15903</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-versus-noise-compensation-for-x-vector-speaker-recognition-systems-in-noisy-environments-2006.15903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-versus-noise-compensation-for-x-vector-speaker-recognition-systems-in-noisy-environments-2006.15903"/></url>
<url><loc>https://scifaro.com/en/abs/a-sequential-self-teaching-approach-for-improving-generalization-in-sound-event-recognition-2007.00144</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sequential-self-teaching-approach-for-improving-generalization-in-sound-event-recognition-2007.00144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sequential-self-teaching-approach-for-improving-generalization-in-sound-event-recognition-2007.00144"/></url>
<url><loc>https://scifaro.com/en/abs/consistent-independent-low-rank-matrix-analysis-for-determined-blind-source-separation-2007.00274</loc><lastmod>2020-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistent-independent-low-rank-matrix-analysis-for-determined-blind-source-separation-2007.00274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistent-independent-low-rank-matrix-analysis-for-determined-blind-source-separation-2007.00274"/></url>
<url><loc>https://scifaro.com/en/abs/joint-diagonalizability-constrained-multichannel-nonnegative-matrix-factorization-based-on-multivariate-complex-sub-gaussian-distribution-2007.00416</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-diagonalizability-constrained-multichannel-nonnegative-matrix-factorization-based-on-multivariate-complex-sub-gaussian-distribution-2007.00416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-diagonalizability-constrained-multichannel-nonnegative-matrix-factorization-based-on-multivariate-complex-sub-gaussian-distribution-2007.00416"/></url>
<url><loc>https://scifaro.com/en/abs/orchideasol-a-dataset-of-extended-instrumental-techniques-for-computer-aided-orchestration-2007.00763</loc><lastmod>2020-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/orchideasol-a-dataset-of-extended-instrumental-techniques-for-computer-aided-orchestration-2007.00763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/orchideasol-a-dataset-of-extended-instrumental-techniques-for-computer-aided-orchestration-2007.00763"/></url>
<url><loc>https://scifaro.com/en/abs/spot-the-conversation-speaker-diarisation-in-the-wild-2007.01216</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spot-the-conversation-speaker-diarisation-in-the-wild-2007.01216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spot-the-conversation-speaker-diarisation-in-the-wild-2007.01216"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-representation-learning-for-singing-voice-separation-with-sinkhorn-distances-2007.02780</loc><lastmod>2021-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-representation-learning-for-singing-voice-separation-with-sinkhorn-distances-2007.02780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-representation-learning-for-singing-voice-separation-with-sinkhorn-distances-2007.02780"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-with-spectrogram-processing-strategies-2007.03781</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-with-spectrogram-processing-strategies-2007.03781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-with-spectrogram-processing-strategies-2007.03781"/></url>
<url><loc>https://scifaro.com/en/abs/training-sound-event-detection-on-a-heterogeneous-dataset-2007.03931</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-sound-event-detection-on-a-heterogeneous-dataset-2007.03931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-sound-event-detection-on-a-heterogeneous-dataset-2007.03931"/></url>
<url><loc>https://scifaro.com/en/abs/improving-sound-event-detection-in-domestic-environments-using-sound-separation-2007.03932</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-sound-event-detection-in-domestic-environments-using-sound-separation-2007.03932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-sound-event-detection-in-domestic-environments-using-sound-separation-2007.03932"/></url>
<url><loc>https://scifaro.com/en/abs/information-communication-and-music-recognition-of-musical-dissonance-and-consonance-in-a-simple-reservoir-computing-system-2007.04360</loc><lastmod>2020-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/information-communication-and-music-recognition-of-musical-dissonance-and-consonance-in-a-simple-reservoir-computing-system-2007.04360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/information-communication-and-music-recognition-of-musical-dissonance-and-consonance-in-a-simple-reservoir-computing-system-2007.04360"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-regularization-based-on-infrequent-classes-for-audio-captioning-2007.04660</loc><lastmod>2020-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-regularization-based-on-infrequent-classes-for-audio-captioning-2007.04660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-regularization-based-on-infrequent-classes-for-audio-captioning-2007.04660"/></url>
<url><loc>https://scifaro.com/en/abs/rwcp-ssd-onomatopoeia-onomatopoeic-word-dataset-for-environmental-sound-synthesis-2007.04719</loc><lastmod>2020-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rwcp-ssd-onomatopoeia-onomatopoeic-word-dataset-for-environmental-sound-synthesis-2007.04719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rwcp-ssd-onomatopoeia-onomatopoeic-word-dataset-for-environmental-sound-synthesis-2007.04719"/></url>
<url><loc>https://scifaro.com/en/abs/conditioned-time-dilated-convolutions-for-sound-event-detection-2007.05183</loc><lastmod>2020-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioned-time-dilated-convolutions-for-sound-event-detection-2007.05183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioned-time-dilated-convolutions-for-sound-event-detection-2007.05183"/></url>
<url><loc>https://scifaro.com/en/abs/overcoming-label-noise-in-audio-event-detection-using-sequential-labeling-2007.05191</loc><lastmod>2020-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overcoming-label-noise-in-audio-event-detection-using-sequential-labeling-2007.05191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overcoming-label-noise-in-audio-event-detection-using-sequential-labeling-2007.05191"/></url>
<url><loc>https://scifaro.com/en/abs/otoworld-towards-learning-to-separate-by-learning-to-move-2007.06123</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/otoworld-towards-learning-to-separate-by-learning-to-move-2007.06123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/otoworld-towards-learning-to-separate-by-learning-to-move-2007.06123"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-learning-approach-for-low-latency-packet-loss-concealment-of-audio-signals-in-networked-music-performance-applications-2007.07132</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-learning-approach-for-low-latency-packet-loss-concealment-of-audio-signals-in-networked-music-performance-applications-2007.07132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-learning-approach-for-low-latency-packet-loss-concealment-of-audio-signals-in-networked-music-performance-applications-2007.07132"/></url>
<url><loc>https://scifaro.com/en/abs/learning-frame-level-attention-for-environmental-sound-classification-2007.07241</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-frame-level-attention-for-environmental-sound-classification-2007.07241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-frame-level-attention-for-environmental-sound-classification-2007.07241"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-xl-based-music-generation-with-multiple-sequences-of-time-valued-notes-2007.07244</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-xl-based-music-generation-with-multiple-sequences-of-time-valued-notes-2007.07244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-xl-based-music-generation-with-multiple-sequences-of-time-valued-notes-2007.07244"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-cue-points-for-dj-mixing-2007.08411</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-cue-points-for-dj-mixing-2007.08411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-cue-points-for-dj-mixing-2007.08411"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-of-context-aware-pitch-prosody-representations-2007.09060</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-of-context-aware-pitch-prosody-representations-2007.09060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-of-context-aware-pitch-prosody-representations-2007.09060"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-speaker-tracking-with-embeddings-2007.10248</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-speaker-tracking-with-embeddings-2007.10248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-speaker-tracking-with-embeddings-2007.10248"/></url>
<url><loc>https://scifaro.com/en/abs/wav2shape-hearing-the-shape-of-a-drum-machine-2007.10299</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2shape-hearing-the-shape-of-a-drum-machine-2007.10299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2shape-hearing-the-shape-of-a-drum-machine-2007.10299"/></url>
<url><loc>https://scifaro.com/en/abs/guided-multi-branch-learning-systems-for-sound-event-detection-with-sound-separation-2007.10638</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-multi-branch-learning-systems-for-sound-event-detection-with-sound-separation-2007.10638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-multi-branch-learning-systems-for-sound-event-detection-with-sound-separation-2007.10638"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-scattering-accurately-models-auditory-similarities-between-instrumental-playing-techniques-2007.10926</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-scattering-accurately-models-auditory-similarities-between-instrumental-playing-techniques-2007.10926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-scattering-accurately-models-auditory-similarities-between-instrumental-playing-techniques-2007.10926"/></url>
<url><loc>https://scifaro.com/en/abs/augmentation-adversarial-training-for-self-supervised-speaker-recognition-2007.12085</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmentation-adversarial-training-for-self-supervised-speaker-recognition-2007.12085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmentation-adversarial-training-for-self-supervised-speaker-recognition-2007.12085"/></url>
<url><loc>https://scifaro.com/en/abs/dd-cnn-depthwise-disout-convolutional-neural-network-for-low-complexity-acoustic-scene-classification-2007.12864</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dd-cnn-depthwise-disout-convolutional-neural-network-for-low-complexity-acoustic-scene-classification-2007.12864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dd-cnn-depthwise-disout-convolutional-neural-network-for-low-complexity-acoustic-scene-classification-2007.12864"/></url>
<url><loc>https://scifaro.com/en/abs/robust-front-end-for-multi-channel-asr-using-flow-based-density-estimation-2007.12903</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-front-end-for-multi-channel-asr-using-flow-based-density-estimation-2007.12903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-front-end-for-multi-channel-asr-using-flow-based-density-estimation-2007.12903"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-neural-audio-visual-sound-source-localization-via-probabilistic-spatial-modeling-2007.13976</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-neural-audio-visual-sound-source-localization-via-probabilistic-spatial-modeling-2007.13976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-neural-audio-visual-sound-source-localization-via-probabilistic-spatial-modeling-2007.13976"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-independent-vector-extraction-of-dominant-target-speech-2008.00143</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-independent-vector-extraction-of-dominant-target-speech-2008.00143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-independent-vector-extraction-of-dominant-target-speech-2008.00143"/></url>
<url><loc>https://scifaro.com/en/abs/audiolime-listenable-explanations-using-source-separation-2008.00582</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiolime-listenable-explanations-using-source-separation-2008.00582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiolime-listenable-explanations-using-source-separation-2008.00582"/></url>
<url><loc>https://scifaro.com/en/abs/the-jazz-transformer-on-the-front-line-exploring-the-shortcomings-of-ai-composed-music-through-quantitative-measures-2008.01307</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-jazz-transformer-on-the-front-line-exploring-the-shortcomings-of-ai-composed-music-through-quantitative-measures-2008.01307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-jazz-transformer-on-the-front-line-exploring-the-shortcomings-of-ai-composed-music-through-quantitative-measures-2008.01307"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-latent-space-exploration-and-creative-aspects-2008.01370</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-latent-space-exploration-and-creative-aspects-2008.01370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-latent-space-exploration-and-creative-aspects-2008.01370"/></url>
<url><loc>https://scifaro.com/en/abs/neural-granular-sound-synthesis-2008.01393</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-granular-sound-synthesis-2008.01393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-granular-sound-synthesis-2008.01393"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-composition-of-guitar-tabs-by-transformers-and-groove-modeling-2008.01431</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-composition-of-guitar-tabs-by-transformers-and-groove-modeling-2008.01431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-composition-of-guitar-tabs-by-transformers-and-groove-modeling-2008.01431"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-tts-training-with-frame-and-style-reconstruction-loss-2008.01490</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-tts-training-with-frame-and-style-reconstruction-loss-2008.01490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-tts-training-with-frame-and-style-reconstruction-loss-2008.01490"/></url>
<url><loc>https://scifaro.com/en/abs/muspy-a-toolkit-for-symbolic-music-generation-2008.01951</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muspy-a-toolkit-for-symbolic-music-generation-2008.01951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muspy-a-toolkit-for-symbolic-music-generation-2008.01951"/></url>
<url><loc>https://scifaro.com/en/abs/neural-loop-combiner-neural-network-models-for-assessing-the-compatibility-of-loops-2008.02011</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-loop-combiner-neural-network-models-for-assessing-the-compatibility-of-loops-2008.02011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-loop-combiner-neural-network-models-for-assessing-the-compatibility-of-loops-2008.02011"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-characterization-of-expressive-performance-in-classical-music-first-results-of-the-con-espressione-game-2008.02194</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-characterization-of-expressive-performance-in-classical-music-first-results-of-the-con-espressione-game-2008.02194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-characterization-of-expressive-performance-in-classical-music-first-results-of-the-con-espressione-game-2008.02194"/></url>
<url><loc>https://scifaro.com/en/abs/exact-parallelizable-dynamic-time-warping-alignment-with-linear-memory-2008.02734</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exact-parallelizable-dynamic-time-warping-alignment-with-linear-memory-2008.02734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exact-parallelizable-dynamic-time-warping-alignment-with-linear-memory-2008.02734"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-drum-transcription-in-polyphonic-music-2008.02791</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-drum-transcription-in-polyphonic-music-2008.02791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-drum-transcription-in-polyphonic-music-2008.02791"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-playing-techniques-generation-as-a-tagging-problem-2008.03436</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-playing-techniques-generation-as-a-tagging-problem-2008.03436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-playing-techniques-generation-as-a-tagging-problem-2008.03436"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-method-for-obtaining-diffuse-field-measurements-for-microphone-calibration-2008.03513</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-method-for-obtaining-diffuse-field-measurements-for-microphone-calibration-2008.03513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-method-for-obtaining-diffuse-field-measurements-for-microphone-calibration-2008.03513"/></url>
<url><loc>https://scifaro.com/en/abs/metric-learning-vs-classification-for-disentangled-music-representation-learning-2008.03729</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-learning-vs-classification-for-disentangled-music-representation-learning-2008.03729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-learning-vs-classification-for-disentangled-music-representation-learning-2008.03729"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-music-automated-music-composition-and-distribution-2008.04415</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-music-automated-music-composition-and-distribution-2008.04415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-music-automated-music-composition-and-distribution-2008.04415"/></url>
<url><loc>https://scifaro.com/en/abs/plugsonic-a-web-and-mobile-based-platform-for-binaural-audio-and-sonic-narratives-2008.04638</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plugsonic-a-web-and-mobile-based-platform-for-binaural-audio-and-sonic-narratives-2008.04638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plugsonic-a-web-and-mobile-based-platform-for-binaural-audio-and-sonic-narratives-2008.04638"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-intelligibility-in-text-to-speech-synthesis-using-speaking-style-conversion-2008.05809</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-intelligibility-in-text-to-speech-synthesis-using-speaking-style-conversion-2008.05809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-intelligibility-in-text-to-speech-synthesis-using-speaking-style-conversion-2008.05809"/></url>
<url><loc>https://scifaro.com/en/abs/mmm-exploring-conditional-multi-track-music-generation-with-the-transformer-2008.06048</loc><lastmod>2020-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmm-exploring-conditional-multi-track-music-generation-with-the-transformer-2008.06048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmm-exploring-conditional-multi-track-music-generation-with-the-transformer-2008.06048"/></url>
<url><loc>https://scifaro.com/en/abs/computer-generated-music-for-tabletop-role-playing-games-2008.07009</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computer-generated-music-for-tabletop-role-playing-games-2008.07009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computer-generated-music-for-tabletop-role-playing-games-2008.07009"/></url>
<url><loc>https://scifaro.com/en/abs/learning-interpretable-representation-for-controllable-polyphonic-music-generation-2008.07122</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-interpretable-representation-for-controllable-polyphonic-music-generation-2008.07122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-interpretable-representation-for-controllable-polyphonic-music-generation-2008.07122"/></url>
<url><loc>https://scifaro.com/en/abs/pop909-a-pop-song-dataset-for-music-arrangement-generation-2008.07142</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pop909-a-pop-song-dataset-for-music-arrangement-generation-2008.07142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pop909-a-pop-song-dataset-for-music-arrangement-generation-2008.07142"/></url>
<url><loc>https://scifaro.com/en/abs/popmag-pop-music-accompaniment-generation-2008.07703</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/popmag-pop-music-accompaniment-generation-2008.07703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/popmag-pop-music-accompaniment-generation-2008.07703"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-aedes-aegypti-mosquitoes-through-audio-classification-with-convolutional-neural-networks-2008.09024</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-aedes-aegypti-mosquitoes-through-audio-classification-with-convolutional-neural-networks-2008.09024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-aedes-aegypti-mosquitoes-through-audio-classification-with-convolutional-neural-networks-2008.09024"/></url>
<url><loc>https://scifaro.com/en/abs/a-efficient-multimodal-framework-for-large-scale-emotion-recognition-by-fusing-music-and-electrodermal-activity-signals-2008.09743</loc><lastmod>2022-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-efficient-multimodal-framework-for-large-scale-emotion-recognition-by-fusing-music-and-electrodermal-activity-signals-2008.09743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-efficient-multimodal-framework-for-large-scale-emotion-recognition-by-fusing-music-and-electrodermal-activity-signals-2008.09743"/></url>
<url><loc>https://scifaro.com/en/abs/translating-paintings-into-music-using-neural-networks-2008.09960</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/translating-paintings-into-music-using-neural-networks-2008.09960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/translating-paintings-into-music-using-neural-networks-2008.09960"/></url>
<url><loc>https://scifaro.com/en/abs/crnns-for-urban-sound-tagging-with-spatiotemporal-context-2008.10413</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crnns-for-urban-sound-tagging-with-spatiotemporal-context-2008.10413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crnns-for-urban-sound-tagging-with-spatiotemporal-context-2008.10413"/></url>
<url><loc>https://scifaro.com/en/abs/medley2k-a-dataset-of-medley-transitions-2008.11159</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/medley2k-a-dataset-of-medley-transitions-2008.11159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/medley2k-a-dataset-of-medley-transitions-2008.11159"/></url>
<url><loc>https://scifaro.com/en/abs/angus-real-time-manipulation-of-vocal-roughness-for-emotional-speech-transformations-2008.11241</loc><lastmod>2020-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/angus-real-time-manipulation-of-vocal-roughness-for-emotional-speech-transformations-2008.11241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/angus-real-time-manipulation-of-vocal-roughness-for-emotional-speech-transformations-2008.11241"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-british-accents-modelling-the-trap-bath-split-with-functional-data-analysis-2008.12233</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-british-accents-modelling-the-trap-bath-split-with-functional-data-analysis-2008.12233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-british-accents-modelling-the-trap-bath-split-with-functional-data-analysis-2008.12233"/></url>
<url><loc>https://scifaro.com/en/abs/non-local-musical-statistics-as-guides-for-audio-to-score-piano-transcription-2008.12710</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-local-musical-statistics-as-guides-for-audio-to-score-piano-transcription-2008.12710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-local-musical-statistics-as-guides-for-audio-to-score-piano-transcription-2008.12710"/></url>
<url><loc>https://scifaro.com/en/abs/towards-musically-meaningful-explanations-using-source-separation-2009.02051</loc><lastmod>2020-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-musically-meaningful-explanations-using-source-separation-2009.02051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-musically-meaningful-explanations-using-source-separation-2009.02051"/></url>
<url><loc>https://scifaro.com/en/abs/digital-envelope-estimation-via-geometric-properties-of-an-arbitrary-real-signal-2009.02860</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-envelope-estimation-via-geometric-properties-of-an-arbitrary-real-signal-2009.02860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-envelope-estimation-via-geometric-properties-of-an-arbitrary-real-signal-2009.02860"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-multi-modal-features-from-pre-trained-networks-for-alzheimer-s-dementia-recognition-2009.04070</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-multi-modal-features-from-pre-trained-networks-for-alzheimer-s-dementia-recognition-2009.04070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-multi-modal-features-from-pre-trained-networks-for-alzheimer-s-dementia-recognition-2009.04070"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-and-classification-model-for-malay-hindi-tamil-and-chinese-music-2009.04459</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-and-classification-model-for-malay-hindi-tamil-and-chinese-music-2009.04459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-and-classification-model-for-malay-hindi-tamil-and-chinese-music-2009.04459"/></url>
<url><loc>https://scifaro.com/en/abs/sonyc-ust-v2-an-urban-sound-tagging-dataset-with-spatiotemporal-context-2009.05188</loc><lastmod>2020-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonyc-ust-v2-an-urban-sound-tagging-dataset-with-spatiotemporal-context-2009.05188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonyc-ust-v2-an-urban-sound-tagging-dataset-with-spatiotemporal-context-2009.05188"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-vowel-nasalization-using-instantaneous-spectra-2009.06416</loc><lastmod>2020-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-vowel-nasalization-using-instantaneous-spectra-2009.06416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-vowel-nasalization-using-instantaneous-spectra-2009.06416"/></url>
<url><loc>https://scifaro.com/en/abs/cough-against-covid-evidence-of-covid-19-signature-in-cough-sounds-2009.08790</loc><lastmod>2020-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cough-against-covid-evidence-of-covid-19-signature-in-cough-sounds-2009.08790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cough-against-covid-evidence-of-covid-19-signature-in-cough-sounds-2009.08790"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-speech-emotion-recognition-using-manta-ray-based-feature-selection-2009.08909</loc><lastmod>2020-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-speech-emotion-recognition-using-manta-ray-based-feature-selection-2009.08909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-speech-emotion-recognition-using-manta-ray-based-feature-selection-2009.08909"/></url>
<url><loc>https://scifaro.com/en/abs/correlating-subword-articulation-with-lip-shapes-for-embedding-aware-audio-visual-speech-enhancement-2009.09561</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correlating-subword-articulation-with-lip-shapes-for-embedding-aware-audio-visual-speech-enhancement-2009.09561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correlating-subword-articulation-with-lip-shapes-for-embedding-aware-audio-visual-speech-enhancement-2009.09561"/></url>
<url><loc>https://scifaro.com/en/abs/the-coughvid-crowdsourcing-dataset-a-corpus-for-the-study-of-large-scale-cough-analysis-algorithms-2009.11644</loc><lastmod>2021-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-coughvid-crowdsourcing-dataset-a-corpus-for-the-study-of-large-scale-cough-analysis-algorithms-2009.11644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-coughvid-crowdsourcing-dataset-a-corpus-for-the-study-of-large-scale-cough-analysis-algorithms-2009.11644"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-space-representation-of-a-subtractive-synthesizer-2009.11706</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-space-representation-of-a-subtractive-synthesizer-2009.11706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-space-representation-of-a-subtractive-synthesizer-2009.11706"/></url>
<url><loc>https://scifaro.com/en/abs/bespoke-neural-networks-for-score-informed-source-separation-2009.13729</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bespoke-neural-networks-for-score-informed-source-separation-2009.13729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bespoke-neural-networks-for-score-informed-source-separation-2009.13729"/></url>
<url><loc>https://scifaro.com/en/abs/residual-acoustic-echo-suppression-based-on-efficient-multi-task-convolutional-neural-network-2009.13931</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-acoustic-echo-suppression-based-on-efficient-multi-task-convolutional-neural-network-2009.13931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-acoustic-echo-suppression-based-on-efficient-multi-task-convolutional-neural-network-2009.13931"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-evaluation-methodology-for-audio-to-score-alignment-2009.14374</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-evaluation-methodology-for-audio-to-score-alignment-2009.14374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-evaluation-methodology-for-audio-to-score-alignment-2009.14374"/></url>
<url><loc>https://scifaro.com/en/abs/the-midi-degradation-toolkit-symbolic-music-augmentation-and-correction-2010.00059</loc><lastmod>2020-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-midi-degradation-toolkit-symbolic-music-augmentation-and-correction-2010.00059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-midi-degradation-toolkit-symbolic-music-augmentation-and-correction-2010.00059"/></url>
<url><loc>https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-and-application-to-audio-signal-recovery-2010.00392</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-and-application-to-audio-signal-recovery-2010.00392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-and-application-to-audio-signal-recovery-2010.00392"/></url>
<url><loc>https://scifaro.com/en/abs/fsd50k-an-open-dataset-of-human-labeled-sound-events-2010.00475</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fsd50k-an-open-dataset-of-human-labeled-sound-events-2010.00475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fsd50k-an-open-dataset-of-human-labeled-sound-events-2010.00475"/></url>
<url><loc>https://scifaro.com/en/abs/deep-composer-classification-using-symbolic-representation-2010.00823</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-composer-classification-using-symbolic-representation-2010.00823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-composer-classification-using-symbolic-representation-2010.00823"/></url>
<url><loc>https://scifaro.com/en/abs/resonant-processing-of-instrumental-sound-controlled-by-spatial-position-2010.01572</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resonant-processing-of-instrumental-sound-controlled-by-spatial-position-2010.01572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resonant-processing-of-instrumental-sound-controlled-by-spatial-position-2010.01572"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-utterance-wise-and-continuous-speech-separation-2010.01703</loc><lastmod>2021-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-utterance-wise-and-continuous-speech-separation-2010.01703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-utterance-wise-and-continuous-speech-separation-2010.01703"/></url>
<url><loc>https://scifaro.com/en/abs/high-resolution-piano-transcription-with-pedals-by-regressing-onset-and-offset-times-2010.01815</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-resolution-piano-transcription-with-pedals-by-regressing-onset-and-offset-times-2010.01815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-resolution-piano-transcription-with-pedals-by-regressing-onset-and-offset-times-2010.01815"/></url>
<url><loc>https://scifaro.com/en/abs/voicegrad-non-parallel-any-to-many-voice-conversion-with-annealed-langevin-dynamics-2010.02977</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicegrad-non-parallel-any-to-many-voice-conversion-with-annealed-langevin-dynamics-2010.02977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicegrad-non-parallel-any-to-many-voice-conversion-with-annealed-langevin-dynamics-2010.02977"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-efficiency-of-spectral-features-extraction-by-structuring-the-audio-files-2010.03136</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-efficiency-of-spectral-features-extraction-by-structuring-the-audio-files-2010.03136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-efficiency-of-spectral-features-extraction-by-structuring-the-audio-files-2010.03136"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-on-audio-source-separation-2010.03164</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-on-audio-source-separation-2010.03164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-on-audio-source-separation-2010.03164"/></url>
<url><loc>https://scifaro.com/en/abs/generative-melody-composition-with-human-in-the-loop-bayesian-optimization-2010.03190</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-melody-composition-with-human-in-the-loop-bayesian-optimization-2010.03190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-melody-composition-with-human-in-the-loop-bayesian-optimization-2010.03190"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-transducer-one-model-unifying-streaming-and-non-streaming-speech-recognition-2010.03192</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-transducer-one-model-unifying-streaming-and-non-streaming-speech-recognition-2010.03192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-transducer-one-model-unifying-streaming-and-non-streaming-speech-recognition-2010.03192"/></url>
<url><loc>https://scifaro.com/en/abs/less-is-more-faster-and-better-music-version-identification-with-embedding-distillation-2010.03284</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/less-is-more-faster-and-better-music-version-identification-with-embedding-distillation-2010.03284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/less-is-more-faster-and-better-music-version-identification-with-embedding-distillation-2010.03284"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adversarial-neural-networks-for-dysarthric-speech-recognition-2010.03623</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adversarial-neural-networks-for-dysarthric-speech-recognition-2010.03623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adversarial-neural-networks-for-dysarthric-speech-recognition-2010.03623"/></url>
<url><loc>https://scifaro.com/en/abs/tatum-level-drum-transcription-based-on-a-convolutional-recurrent-neural-network-with-language-model-based-regularized-training-2010.03749</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tatum-level-drum-transcription-based-on-a-convolutional-recurrent-neural-network-with-language-model-based-regularized-training-2010.03749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tatum-level-drum-transcription-based-on-a-convolutional-recurrent-neural-network-with-language-model-based-regularized-training-2010.03749"/></url>
<url><loc>https://scifaro.com/en/abs/vrengt-a-shared-body-machine-instrument-for-music-dance-performance-2010.03779</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vrengt-a-shared-body-machine-instrument-for-music-dance-performance-2010.03779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vrengt-a-shared-body-machine-instrument-for-music-dance-performance-2010.03779"/></url>
<url><loc>https://scifaro.com/en/abs/texture-based-presentation-attack-detection-for-automatic-speaker-verification-2010.04038</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/texture-based-presentation-attack-detection-for-automatic-speaker-verification-2010.04038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/texture-based-presentation-attack-detection-for-automatic-speaker-verification-2010.04038"/></url>
<url><loc>https://scifaro.com/en/abs/non-attentive-tacotron-robust-and-controllable-neural-tts-synthesis-including-unsupervised-duration-modeling-2010.04301</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-attentive-tacotron-robust-and-controllable-neural-tts-synthesis-including-unsupervised-duration-modeling-2010.04301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-attentive-tacotron-robust-and-controllable-neural-tts-synthesis-including-unsupervised-duration-modeling-2010.04301"/></url>
<url><loc>https://scifaro.com/en/abs/baseline-system-of-voice-conversion-challenge-2020-with-cyclic-variational-autoencoder-and-parallel-wavegan-2010.04429</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/baseline-system-of-voice-conversion-challenge-2020-with-cyclic-variational-autoencoder-and-parallel-wavegan-2010.04429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/baseline-system-of-voice-conversion-challenge-2020-with-cyclic-variational-autoencoder-and-parallel-wavegan-2010.04429"/></url>
<url><loc>https://scifaro.com/en/abs/dataset-augmentation-and-dimensionality-reduction-of-pinna-related-transfer-functions-2010.04546</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dataset-augmentation-and-dimensionality-reduction-of-pinna-related-transfer-functions-2010.04546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dataset-augmentation-and-dimensionality-reduction-of-pinna-related-transfer-functions-2010.04546"/></url>
<url><loc>https://scifaro.com/en/abs/learning-acoustic-scattering-fields-for-dynamic-interactive-sound-propagation-2010.04865</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-acoustic-scattering-fields-for-dynamic-interactive-sound-propagation-2010.04865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-acoustic-scattering-fields-for-dynamic-interactive-sound-propagation-2010.04865"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-compression-method-with-matrix-product-operators-for-speech-enhancement-2010.04950</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-compression-method-with-matrix-product-operators-for-speech-enhancement-2010.04950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-compression-method-with-matrix-product-operators-for-speech-enhancement-2010.04950"/></url>
<url><loc>https://scifaro.com/en/abs/ai-song-contest-human-ai-co-creation-in-songwriting-2010.05388</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-song-contest-human-ai-co-creation-in-songwriting-2010.05388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-song-contest-human-ai-co-creation-in-songwriting-2010.05388"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-speaker-recognition-system-using-timbre-properties-2010.05502</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-speaker-recognition-system-using-timbre-properties-2010.05502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-speaker-recognition-system-using-timbre-properties-2010.05502"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-gan-generative-adversarial-networks-for-efficient-and-high-fidelity-speech-synthesis-2010.05646</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-gan-generative-adversarial-networks-for-efficient-and-high-fidelity-speech-synthesis-2010.05646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-gan-generative-adversarial-networks-for-efficient-and-high-fidelity-speech-synthesis-2010.05646"/></url>
<url><loc>https://scifaro.com/en/abs/conditioning-trick-for-training-stable-gans-2010.05844</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioning-trick-for-training-stable-gans-2010.05844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioning-trick-for-training-stable-gans-2010.05844"/></url>
<url><loc>https://scifaro.com/en/abs/the-cone-of-silence-speech-separation-by-localization-2010.06007</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-cone-of-silence-speech-separation-by-localization-2010.06007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-cone-of-silence-speech-separation-by-localization-2010.06007"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-triplet-loss-based-emotion-embedding-system-for-speech-emotion-recognition-2010.06200</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-triplet-loss-based-emotion-embedding-system-for-speech-emotion-recognition-2010.06200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-triplet-loss-based-emotion-embedding-system-for-speech-emotion-recognition-2010.06200"/></url>
<url><loc>https://scifaro.com/en/abs/a-variational-autoencoder-for-music-generation-controlled-by-tonal-tension-2010.06230</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variational-autoencoder-for-music-generation-controlled-by-tonal-tension-2010.06230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variational-autoencoder-for-music-generation-controlled-by-tonal-tension-2010.06230"/></url>
<url><loc>https://scifaro.com/en/abs/principles-for-designing-computer-music-controllers-2010.06524</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/principles-for-designing-computer-music-controllers-2010.06524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/principles-for-designing-computer-music-controllers-2010.06524"/></url>
<url><loc>https://scifaro.com/en/abs/towards-resistant-audio-adversarial-examples-2010.07190</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-resistant-audio-adversarial-examples-2010.07190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-resistant-audio-adversarial-examples-2010.07190"/></url>
<url><loc>https://scifaro.com/en/abs/emergent-jaw-predominance-in-vocal-development-through-stochastic-optimization-2010.07208</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emergent-jaw-predominance-in-vocal-development-through-stochastic-optimization-2010.07208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emergent-jaw-predominance-in-vocal-development-through-stochastic-optimization-2010.07208"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-analysis-and-influence-of-hierarchical-structure-on-melody-rhythm-and-harmony-in-popular-music-2010.07518</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-analysis-and-influence-of-hierarchical-structure-on-melody-rhythm-and-harmony-in-popular-music-2010.07518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-analysis-and-influence-of-hierarchical-structure-on-melody-rhythm-and-harmony-in-popular-music-2010.07518"/></url>
<url><loc>https://scifaro.com/en/abs/melody-classification-based-on-performance-event-vector-and-brnn-2010.07562</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-classification-based-on-performance-event-vector-and-brnn-2010.07562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-classification-based-on-performance-event-vector-and-brnn-2010.07562"/></url>
<url><loc>https://scifaro.com/en/abs/the-neteasegames-system-for-voice-conversion-challenge-2020-with-vector-quantization-variational-autoencoder-and-wavenet-2010.07630</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-neteasegames-system-for-voice-conversion-challenge-2020-with-vector-quantization-variational-autoencoder-and-wavenet-2010.07630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-neteasegames-system-for-voice-conversion-challenge-2020-with-vector-quantization-variational-autoencoder-and-wavenet-2010.07630"/></url>
<url><loc>https://scifaro.com/en/abs/music-classification-in-midi-format-based-on-lstm-mdel-2010.07739</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-classification-in-midi-format-based-on-lstm-mdel-2010.07739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-classification-in-midi-format-based-on-lstm-mdel-2010.07739"/></url>
<url><loc>https://scifaro.com/en/abs/a-transformer-based-pitch-sequence-autoencoder-with-midi-augmentation-2010.07758</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transformer-based-pitch-sequence-autoencoder-with-midi-augmentation-2010.07758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transformer-based-pitch-sequence-autoencoder-with-midi-augmentation-2010.07758"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-network-based-inverse-filtering-approach-for-speech-de-reverberation-2010.07895</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-based-inverse-filtering-approach-for-speech-de-reverberation-2010.07895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-based-inverse-filtering-approach-for-speech-de-reverberation-2010.07895"/></url>
<url><loc>https://scifaro.com/en/abs/pirhdy-learning-pitch-rhythm-and-dynamics-aware-embeddings-for-symbolic-music-2010.08091</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pirhdy-learning-pitch-rhythm-and-dynamics-aware-embeddings-for-symbolic-music-2010.08091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pirhdy-learning-pitch-rhythm-and-dynamics-aware-embeddings-for-symbolic-music-2010.08091"/></url>
<url><loc>https://scifaro.com/en/abs/melody-classifier-with-stacked-lstm-2010.08123</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-classifier-with-stacked-lstm-2010.08123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-classifier-with-stacked-lstm-2010.08123"/></url>
<url><loc>https://scifaro.com/en/abs/towards-natural-bilingual-and-code-switched-speech-synthesis-based-on-mix-of-monolingual-recordings-and-cross-lingual-voice-conversion-2010.08136</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-natural-bilingual-and-code-switched-speech-synthesis-based-on-mix-of-monolingual-recordings-and-cross-lingual-voice-conversion-2010.08136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-natural-bilingual-and-code-switched-speech-synthesis-based-on-mix-of-monolingual-recordings-and-cross-lingual-voice-conversion-2010.08136"/></url>
<url><loc>https://scifaro.com/en/abs/are-multiple-cross-correlation-identities-better-than-just-two-improving-the-estimate-of-time-differences-of-arrivals-from-blind-audio-signals-2010.08428</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-multiple-cross-correlation-identities-better-than-just-two-improving-the-estimate-of-time-differences-of-arrivals-from-blind-audio-signals-2010.08428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-multiple-cross-correlation-identities-better-than-just-two-improving-the-estimate-of-time-differences-of-arrivals-from-blind-audio-signals-2010.08428"/></url>
<url><loc>https://scifaro.com/en/abs/latent-vector-recovery-of-audio-gans-2010.08534</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-vector-recovery-of-audio-gans-2010.08534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-vector-recovery-of-audio-gans-2010.08534"/></url>
<url><loc>https://scifaro.com/en/abs/studying-the-similarity-of-covid-19-sounds-based-on-correlation-analysis-of-mfcc-2010.08770</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/studying-the-similarity-of-covid-19-sounds-based-on-correlation-analysis-of-mfcc-2010.08770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/studying-the-similarity-of-covid-19-sounds-based-on-correlation-analysis-of-mfcc-2010.08770"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-generative-adversarial-network-for-speech-enhancement-2010.09132</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-generative-adversarial-network-for-speech-enhancement-2010.09132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-generative-adversarial-network-for-speech-enhancement-2010.09132"/></url>
<url><loc>https://scifaro.com/en/abs/joint-analysis-of-sound-events-and-acoustic-scenes-using-multitask-learning-2010.09213</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-analysis-of-sound-events-and-acoustic-scenes-using-multitask-learning-2010.09213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-analysis-of-sound-events-and-acoustic-scenes-using-multitask-learning-2010.09213"/></url>
<url><loc>https://scifaro.com/en/abs/fast-accuracy-estimation-of-deep-learning-based-multi-class-musical-source-separation-2010.09453</loc><lastmod>2021-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-accuracy-estimation-of-deep-learning-based-multi-class-musical-source-separation-2010.09453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-accuracy-estimation-of-deep-learning-based-multi-class-musical-source-separation-2010.09453"/></url>
<url><loc>https://scifaro.com/en/abs/hit-song-prediction-based-on-early-adopter-data-and-audio-features-2010.09489</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hit-song-prediction-based-on-early-adopter-data-and-audio-features-2010.09489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hit-song-prediction-based-on-early-adopter-data-and-audio-features-2010.09489"/></url>
<url><loc>https://scifaro.com/en/abs/clar-contrastive-learning-of-auditory-representations-2010.09542</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clar-contrastive-learning-of-auditory-representations-2010.09542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clar-contrastive-learning-of-auditory-representations-2010.09542"/></url>
<url><loc>https://scifaro.com/en/abs/micaugment-one-shot-microphone-style-transfer-2010.09658</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/micaugment-one-shot-microphone-style-transfer-2010.09658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/micaugment-one-shot-microphone-style-transfer-2010.09658"/></url>
<url><loc>https://scifaro.com/en/abs/multi-window-data-augmentation-approach-for-speech-emotion-recognition-2010.09895</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-window-data-augmentation-approach-for-speech-emotion-recognition-2010.09895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-window-data-augmentation-approach-for-speech-emotion-recognition-2010.09895"/></url>
<url><loc>https://scifaro.com/en/abs/bird-big-impulse-response-dataset-2010.09930</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bird-big-impulse-response-dataset-2010.09930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bird-big-impulse-response-dataset-2010.09930"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-spectrogram-reconstruction-on-automatic-music-transcription-an-alternative-approach-to-improve-transcription-accuracy-2010.09969</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-spectrogram-reconstruction-on-automatic-music-transcription-an-alternative-approach-to-improve-transcription-accuracy-2010.09969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-spectrogram-reconstruction-on-automatic-music-transcription-an-alternative-approach-to-improve-transcription-accuracy-2010.09969"/></url>
<url><loc>https://scifaro.com/en/abs/power-pooling-an-adaptive-pooling-function-for-weakly-labelled-sound-event-detection-2010.09985</loc><lastmod>2021-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/power-pooling-an-adaptive-pooling-function-for-weakly-labelled-sound-event-detection-2010.09985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/power-pooling-an-adaptive-pooling-function-for-weakly-labelled-sound-event-detection-2010.09985"/></url>
<url><loc>https://scifaro.com/en/abs/tongji-university-undergraduate-team-for-the-voxceleb-speaker-recognition-challenge2020-2010.10145</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tongji-university-undergraduate-team-for-the-voxceleb-speaker-recognition-challenge2020-2010.10145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tongji-university-undergraduate-team-for-the-voxceleb-speaker-recognition-challenge2020-2010.10145"/></url>
<url><loc>https://scifaro.com/en/abs/phase-recovery-with-bregman-divergences-for-audio-source-separation-2010.10255</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-recovery-with-bregman-divergences-for-audio-source-separation-2010.10255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-recovery-with-bregman-divergences-for-audio-source-separation-2010.10255"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-cross-domain-losses-for-speech-enhancement-2010.10468</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-cross-domain-losses-for-speech-enhancement-2010.10468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-cross-domain-losses-for-speech-enhancement-2010.10468"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-separation-using-speaker-inventories-and-estimated-speech-2010.10556</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-separation-using-speaker-inventories-and-estimated-speech-2010.10556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-separation-using-speaker-inventories-and-estimated-speech-2010.10556"/></url>
<url><loc>https://scifaro.com/en/abs/venomave-targeted-poisoning-against-speech-recognition-2010.10682</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/venomave-targeted-poisoning-against-speech-recognition-2010.10682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/venomave-targeted-poisoning-against-speech-recognition-2010.10682"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-of-object-geometry-from-acoustic-scattering-using-convolutional-neural-networks-2010.10691</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-of-object-geometry-from-acoustic-scattering-using-convolutional-neural-networks-2010.10691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-of-object-geometry-from-acoustic-scattering-using-convolutional-neural-networks-2010.10691"/></url>
<url><loc>https://scifaro.com/en/abs/emformer-efficient-memory-transformer-based-acoustic-model-for-low-latency-streaming-speech-recognition-2010.10759</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emformer-efficient-memory-transformer-based-acoustic-model-for-low-latency-streaming-speech-recognition-2010.10759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emformer-efficient-memory-transformer-based-acoustic-model-for-low-latency-streaming-speech-recognition-2010.10759"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-of-general-purpose-audio-representations-2010.10915</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-of-general-purpose-audio-representations-2010.10915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-of-general-purpose-audio-representations-2010.10915"/></url>
<url><loc>https://scifaro.com/en/abs/wavetransformer-a-novel-architecture-for-audio-captioning-based-on-learning-temporal-and-time-frequency-information-2010.11098</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavetransformer-a-novel-architecture-for-audio-captioning-based-on-learning-temporal-and-time-frequency-information-2010.11098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavetransformer-a-novel-architecture-for-audio-captioning-based-on-learning-temporal-and-time-frequency-information-2010.11098"/></url>
<url><loc>https://scifaro.com/en/abs/joint-blind-room-acoustic-characterization-from-speech-and-music-signals-using-convolutional-recurrent-neural-networks-2010.11167</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-blind-room-acoustic-characterization-from-speech-and-music-signals-using-convolutional-recurrent-neural-networks-2010.11167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-blind-room-acoustic-characterization-from-speech-and-music-signals-using-convolutional-recurrent-neural-networks-2010.11167"/></url>
<url><loc>https://scifaro.com/en/abs/attendaffectnet-self-attention-based-networks-for-predicting-affective-responses-from-movies-2010.11188</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attendaffectnet-self-attention-based-networks-for-predicting-affective-responses-from-movies-2010.11188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attendaffectnet-self-attention-based-networks-for-predicting-affective-responses-from-movies-2010.11188"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-layer-customization-for-noise-robust-speech-emotion-recognition-in-heterogeneous-condition-training-2010.11226</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-layer-customization-for-noise-robust-speech-emotion-recognition-in-heterogeneous-condition-training-2010.11226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-layer-customization-for-noise-robust-speech-emotion-recognition-in-heterogeneous-condition-training-2010.11226"/></url>
<url><loc>https://scifaro.com/en/abs/the-idlab-voxsrc-20-submission-large-margin-fine-tuning-and-quality-aware-score-calibration-in-dnn-based-speaker-verification-2010.11255</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-idlab-voxsrc-20-submission-large-margin-fine-tuning-and-quality-aware-score-calibration-in-dnn-based-speaker-verification-2010.11255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-idlab-voxsrc-20-submission-large-margin-fine-tuning-and-quality-aware-score-calibration-in-dnn-based-speaker-verification-2010.11255"/></url>
<url><loc>https://scifaro.com/en/abs/class-conditional-defense-gan-against-end-to-end-speech-attacks-2010.11352</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/class-conditional-defense-gan-against-end-to-end-speech-attacks-2010.11352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/class-conditional-defense-gan-against-end-to-end-speech-attacks-2010.11352"/></url>
<url><loc>https://scifaro.com/en/abs/nu-gan-high-resolution-neural-upsampling-with-gan-2010.11362</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nu-gan-high-resolution-neural-upsampling-with-gan-2010.11362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nu-gan-high-resolution-neural-upsampling-with-gan-2010.11362"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-tacotron-non-autoregressive-and-controllable-tts-2010.11439</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-tacotron-non-autoregressive-and-controllable-tts-2010.11439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-tacotron-non-autoregressive-and-controllable-tts-2010.11439"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-generative-and-contrastive-learning-of-audio-representations-2010.11459</loc><lastmod>2021-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-generative-and-contrastive-learning-of-audio-representations-2010.11459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-generative-and-contrastive-learning-of-audio-representations-2010.11459"/></url>
<url><loc>https://scifaro.com/en/abs/mood-classification-using-listening-data-2010.11512</loc><lastmod>2020-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mood-classification-using-listening-data-2010.11512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mood-classification-using-listening-data-2010.11512"/></url>
<url><loc>https://scifaro.com/en/abs/aishell-3-a-multi-speaker-mandarin-tts-corpus-and-the-baselines-2010.11567</loc><lastmod>2021-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aishell-3-a-multi-speaker-mandarin-tts-corpus-and-the-baselines-2010.11567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aishell-3-a-multi-speaker-mandarin-tts-corpus-and-the-baselines-2010.11567"/></url>
<url><loc>https://scifaro.com/en/abs/lasaft-latent-source-attentive-frequency-transformation-for-conditioned-source-separation-2010.11631</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lasaft-latent-source-attentive-frequency-transformation-for-conditioned-source-separation-2010.11631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lasaft-latent-source-attentive-frequency-transformation-for-conditioned-source-separation-2010.11631"/></url>
<url><loc>https://scifaro.com/en/abs/towards-low-resource-stargan-voice-conversion-using-weight-adaptive-instance-normalization-2010.11646</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-low-resource-stargan-voice-conversion-using-weight-adaptive-instance-normalization-2010.11646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-low-resource-stargan-voice-conversion-using-weight-adaptive-instance-normalization-2010.11646"/></url>
<url><loc>https://scifaro.com/en/abs/the-huawei-speaker-diarisation-system-for-the-voxceleb-speaker-diarisation-challenge-2010.11657</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-huawei-speaker-diarisation-system-for-the-voxceleb-speaker-diarisation-challenge-2010.11657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-huawei-speaker-diarisation-system-for-the-voxceleb-speaker-diarisation-challenge-2010.11657"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-acoustic-vehicle-counting-2010.11659</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-acoustic-vehicle-counting-2010.11659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-acoustic-vehicle-counting-2010.11659"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-vc3-examining-and-improving-cyclegan-vcs-for-mel-spectrogram-conversion-2010.11672</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-vc3-examining-and-improving-cyclegan-vcs-for-mel-spectrogram-conversion-2010.11672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-vc3-examining-and-improving-cyclegan-vcs-for-mel-spectrogram-conversion-2010.11672"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-based-vehicle-counting-in-low-to-moderate-traffic-flow-2010.11716</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-based-vehicle-counting-in-low-to-moderate-traffic-flow-2010.11716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-based-vehicle-counting-in-low-to-moderate-traffic-flow-2010.11716"/></url>
<url><loc>https://scifaro.com/en/abs/compositional-embedding-models-for-speaker-identification-and-diarization-with-simultaneous-speech-from-2-speakers-2010.11803</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compositional-embedding-models-for-speaker-identification-and-diarization-with-simultaneous-speech-from-2-speakers-2010.11803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compositional-embedding-models-for-speaker-identification-and-diarization-with-simultaneous-speech-from-2-speakers-2010.11803"/></url>
<url><loc>https://scifaro.com/en/abs/urban-sound-classification-striving-towards-a-fair-comparison-2010.11805</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/urban-sound-classification-striving-towards-a-fair-comparison-2010.11805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/urban-sound-classification-striving-towards-a-fair-comparison-2010.11805"/></url>
<url><loc>https://scifaro.com/en/abs/towards-listening-to-10-people-simultaneously-an-efficient-permutation-invariant-training-of-audio-source-separation-using-sinkhorn-s-algorithm-2010.11871</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-listening-to-10-people-simultaneously-an-efficient-permutation-invariant-training-of-audio-source-separation-using-sinkhorn-s-algorithm-2010.11871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-listening-to-10-people-simultaneously-an-efficient-permutation-invariant-training-of-audio-source-separation-using-sinkhorn-s-algorithm-2010.11871"/></url>
<url><loc>https://scifaro.com/en/abs/transcription-is-all-you-need-learning-to-separate-musical-mixtures-with-score-as-supervision-2010.11904</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcription-is-all-you-need-learning-to-separate-musical-mixtures-with-score-as-supervision-2010.11904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcription-is-all-you-need-learning-to-separate-musical-mixtures-with-score-as-supervision-2010.11904"/></url>
<url><loc>https://scifaro.com/en/abs/neural-audio-fingerprint-for-high-specific-audio-retrieval-based-on-contrastive-learning-2010.11910</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-audio-fingerprint-for-high-specific-audio-retrieval-based-on-contrastive-learning-2010.11910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-audio-fingerprint-for-high-specific-audio-retrieval-based-on-contrastive-learning-2010.11910"/></url>
<url><loc>https://scifaro.com/en/abs/listening-to-sounds-of-silence-for-speech-denoising-2010.12013</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-to-sounds-of-silence-for-speech-denoising-2010.12013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-to-sounds-of-silence-for-speech-denoising-2010.12013"/></url>
<url><loc>https://scifaro.com/en/abs/combination-of-deep-speaker-embeddings-for-diarisation-2010.12025</loc><lastmod>2021-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combination-of-deep-speaker-embeddings-for-diarisation-2010.12025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combination-of-deep-speaker-embeddings-for-diarisation-2010.12025"/></url>
<url><loc>https://scifaro.com/en/abs/improving-streaming-automatic-speech-recognition-with-non-streaming-model-distillation-on-unsupervised-data-2010.12096</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-streaming-automatic-speech-recognition-with-non-streaming-model-distillation-on-unsupervised-data-2010.12096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-streaming-automatic-speech-recognition-with-non-streaming-model-distillation-on-unsupervised-data-2010.12096"/></url>
<url><loc>https://scifaro.com/en/abs/gsep-a-robust-vocal-and-accompaniment-separation-system-using-gated-cbhg-module-and-loudness-normalization-2010.12139</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gsep-a-robust-vocal-and-accompaniment-separation-system-using-gated-cbhg-module-and-loudness-normalization-2010.12139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gsep-a-robust-vocal-and-accompaniment-separation-system-using-gated-cbhg-module-and-loudness-normalization-2010.12139"/></url>
<url><loc>https://scifaro.com/en/abs/enriching-under-represented-named-entities-to-improve-speech-recognition-performance-2010.12143</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enriching-under-represented-named-entities-to-improve-speech-recognition-performance-2010.12143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enriching-under-represented-named-entities-to-improve-speech-recognition-performance-2010.12143"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-local-dense-synthesizer-attention-2010.12155</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-local-dense-synthesizer-attention-2010.12155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-local-dense-synthesizer-attention-2010.12155"/></url>
<url><loc>https://scifaro.com/en/abs/don-t-shoot-butterfly-with-rifles-multi-channel-continuous-speech-separation-with-early-exit-transformer-2010.12180</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/don-t-shoot-butterfly-with-rifles-multi-channel-continuous-speech-separation-with-early-exit-transformer-2010.12180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/don-t-shoot-butterfly-with-rifles-multi-channel-continuous-speech-separation-with-early-exit-transformer-2010.12180"/></url>
<url><loc>https://scifaro.com/en/abs/speech-activity-detection-based-on-multilingual-speech-recognition-system-2010.12277</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-activity-detection-based-on-multilingual-speech-recognition-system-2010.12277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-activity-detection-based-on-multilingual-speech-recognition-system-2010.12277"/></url>
<url><loc>https://scifaro.com/en/abs/a-computational-evaluation-of-musical-pattern-discovery-algorithms-2010.12325</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computational-evaluation-of-musical-pattern-discovery-algorithms-2010.12325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computational-evaluation-of-musical-pattern-discovery-algorithms-2010.12325"/></url>
<url><loc>https://scifaro.com/en/abs/eml-system-description-for-voxceleb-speaker-diarization-challenge-2020-2010.12497</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eml-system-description-for-voxceleb-speaker-diarization-challenge-2020-2010.12497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eml-system-description-for-voxceleb-speaker-diarization-challenge-2020-2010.12497"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-transfer-learning-in-music-source-separation-2010.12650</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-transfer-learning-in-music-source-separation-2010.12650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-transfer-learning-in-music-source-separation-2010.12650"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-self-attention-rnn-for-real-time-speech-enhancement-2010.12713</loc><lastmod>2021-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-self-attention-rnn-for-real-time-speech-enhancement-2010.12713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-self-attention-rnn-for-real-time-speech-enhancement-2010.12713"/></url>
<url><loc>https://scifaro.com/en/abs/learning-fine-grained-cross-modality-excitement-for-speech-emotion-recognition-2010.12733</loc><lastmod>2021-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-fine-grained-cross-modality-excitement-for-speech-emotion-recognition-2010.12733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-fine-grained-cross-modality-excitement-for-speech-emotion-recognition-2010.12733"/></url>
<url><loc>https://scifaro.com/en/abs/gazev-gan-based-zero-shot-voice-conversion-over-non-parallel-speech-corpus-2010.12788</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gazev-gan-based-zero-shot-voice-conversion-over-non-parallel-speech-corpus-2010.12788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gazev-gan-based-zero-shot-voice-conversion-over-non-parallel-speech-corpus-2010.12788"/></url>
<url><loc>https://scifaro.com/en/abs/stop-bugging-me-evading-modern-day-wiretapping-using-adversarial-perturbations-2010.12809</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stop-bugging-me-evading-modern-day-wiretapping-using-adversarial-perturbations-2010.12809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stop-bugging-me-evading-modern-day-wiretapping-using-adversarial-perturbations-2010.12809"/></url>
<url><loc>https://scifaro.com/en/abs/speakerfilter-pro-an-improved-target-speaker-extractor-combines-the-time-domain-and-frequency-domain-2010.13053</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speakerfilter-pro-an-improved-target-speaker-extractor-combines-the-time-domain-and-frequency-domain-2010.13053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speakerfilter-pro-an-improved-target-speaker-extractor-combines-the-time-domain-and-frequency-domain-2010.13053"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.13092</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.13092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.13092"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-all-pass-filters-with-randomized-center-frequencies-and-phase-polarity-for-acoustic-and-speech-measurement-and-data-augmentation-2010.13185</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-all-pass-filters-with-randomized-center-frequencies-and-phase-polarity-for-acoustic-and-speech-measurement-and-data-augmentation-2010.13185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-all-pass-filters-with-randomized-center-frequencies-and-phase-polarity-for-acoustic-and-speech-measurement-and-data-augmentation-2010.13185"/></url>
<url><loc>https://scifaro.com/en/abs/ir-gan-room-impulse-response-generator-for-far-field-speech-recognition-2010.13219</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ir-gan-room-impulse-response-generator-for-far-field-speech-recognition-2010.13219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ir-gan-room-impulse-response-generator-for-far-field-speech-recognition-2010.13219"/></url>
<url><loc>https://scifaro.com/en/abs/unified-gradient-reweighting-for-model-biasing-with-applications-to-source-separation-2010.13228</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-gradient-reweighting-for-model-biasing-with-applications-to-source-separation-2010.13228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-gradient-reweighting-for-model-biasing-with-applications-to-source-separation-2010.13228"/></url>
<url><loc>https://scifaro.com/en/abs/decentralizing-feature-extraction-with-quantum-convolutional-neural-network-for-automatic-speech-recognition-2010.13309</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decentralizing-feature-extraction-with-quantum-convolutional-neural-network-for-automatic-speech-recognition-2010.13309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decentralizing-feature-extraction-with-quantum-convolutional-neural-network-for-automatic-speech-recognition-2010.13309"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymization-with-distribution-preserving-x-vector-generation-for-the-voiceprivacy-challenge-2020-2010.13457</loc><lastmod>2021-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymization-with-distribution-preserving-x-vector-generation-for-the-voiceprivacy-challenge-2020-2010.13457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymization-with-distribution-preserving-x-vector-generation-for-the-voiceprivacy-challenge-2020-2010.13457"/></url>
<url><loc>https://scifaro.com/en/abs/melody-harmonization-using-orderless-nade-chord-balancing-and-blocked-gibbs-sampling-2010.13468</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-harmonization-using-orderless-nade-chord-balancing-and-blocked-gibbs-sampling-2010.13468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-harmonization-using-orderless-nade-chord-balancing-and-blocked-gibbs-sampling-2010.13468"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-audio-fingerprinting-2010.13540</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-audio-fingerprinting-2010.13540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-audio-fingerprinting-2010.13540"/></url>
<url><loc>https://scifaro.com/en/abs/the-frequency-spectrum-and-geometry-of-the-hal-saflieni-hypogeum-appear-tuned-2010.13697</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-frequency-spectrum-and-geometry-of-the-hal-saflieni-hypogeum-appear-tuned-2010.13697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-frequency-spectrum-and-geometry-of-the-hal-saflieni-hypogeum-appear-tuned-2010.13697"/></url>
<url><loc>https://scifaro.com/en/abs/bytecover-cover-song-identification-via-multi-loss-training-2010.14022</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bytecover-cover-song-identification-via-multi-loss-training-2010.14022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bytecover-cover-song-identification-via-multi-loss-training-2010.14022"/></url>
<url><loc>https://scifaro.com/en/abs/universal-asr-unifying-streaming-and-non-streaming-asr-using-a-single-encoder-decoder-model-2010.14099</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-asr-unifying-streaming-and-non-streaming-asr-using-a-single-encoder-decoder-model-2010.14099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-asr-unifying-streaming-and-non-streaming-asr-using-a-single-encoder-decoder-model-2010.14099"/></url>
<url><loc>https://scifaro.com/en/abs/rule-embedded-network-for-audio-visual-voice-activity-detection-in-live-musical-video-streams-2010.14168</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rule-embedded-network-for-audio-visual-voice-activity-detection-in-live-musical-video-streams-2010.14168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rule-embedded-network-for-audio-visual-voice-activity-detection-in-live-musical-video-streams-2010.14168"/></url>
<url><loc>https://scifaro.com/en/abs/learning-contextual-tag-embeddings-for-cross-modal-alignment-of-audio-and-tags-2010.14171</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-contextual-tag-embeddings-for-cross-modal-alignment-of-audio-and-tags-2010.14171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-contextual-tag-embeddings-for-cross-modal-alignment-of-audio-and-tags-2010.14171"/></url>
<url><loc>https://scifaro.com/en/abs/deep-generative-factorization-for-speech-signal-2010.14242</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-generative-factorization-for-speech-signal-2010.14242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-generative-factorization-for-speech-signal-2010.14242"/></url>
<url><loc>https://scifaro.com/en/abs/squeezing-value-of-cross-domain-labels-a-decoupled-scoring-approach-for-speaker-verification-2010.14243</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/squeezing-value-of-cross-domain-labels-a-decoupled-scoring-approach-for-speaker-verification-2010.14243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/squeezing-value-of-cross-domain-labels-a-decoupled-scoring-approach-for-speaker-verification-2010.14243"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-speaker-attribute-information-using-multi-task-learning-for-speaker-verification-and-diarization-2010.14269</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-speaker-attribute-information-using-multi-task-learning-for-speaker-verification-and-diarization-2010.14269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-speaker-attribute-information-using-multi-task-learning-for-speaker-verification-and-diarization-2010.14269"/></url>
<url><loc>https://scifaro.com/en/abs/upsampling-artifacts-in-neural-audio-synthesis-2010.14356</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/upsampling-artifacts-in-neural-audio-synthesis-2010.14356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/upsampling-artifacts-in-neural-audio-synthesis-2010.14356"/></url>
<url><loc>https://scifaro.com/en/abs/remixing-music-with-visual-conditioning-2010.14565</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remixing-music-with-visual-conditioning-2010.14565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remixing-music-with-visual-conditioning-2010.14565"/></url>
<url><loc>https://scifaro.com/en/abs/copypaste-an-augmentation-method-for-speech-emotion-recognition-2010.14602</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/copypaste-an-augmentation-method-for-speech-emotion-recognition-2010.14602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/copypaste-an-augmentation-method-for-speech-emotion-recognition-2010.14602"/></url>
<url><loc>https://scifaro.com/en/abs/melody-conditioned-lyrics-generation-with-seqgans-2010.14709</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-conditioned-lyrics-generation-with-seqgans-2010.14709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-conditioned-lyrics-generation-with-seqgans-2010.14709"/></url>
<url><loc>https://scifaro.com/en/abs/seen-and-unseen-emotional-style-transfer-for-voice-conversion-with-a-new-emotional-speech-dataset-2010.14794</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seen-and-unseen-emotional-style-transfer-for-voice-conversion-with-a-new-emotional-speech-dataset-2010.14794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seen-and-unseen-emotional-style-transfer-for-voice-conversion-with-a-new-emotional-speech-dataset-2010.14794"/></url>
<url><loc>https://scifaro.com/en/abs/decoupling-pronunciation-and-language-for-end-to-end-code-switching-automatic-speech-recognition-2010.14798</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupling-pronunciation-and-language-for-end-to-end-code-switching-automatic-speech-recognition-2010.14798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupling-pronunciation-and-language-for-end-to-end-code-switching-automatic-speech-recognition-2010.14798"/></url>
<url><loc>https://scifaro.com/en/abs/ppg-based-singing-voice-conversion-with-adversarial-representation-learning-2010.14804</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ppg-based-singing-voice-conversion-with-adversarial-representation-learning-2010.14804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ppg-based-singing-voice-conversion-with-adversarial-representation-learning-2010.14804"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-midi-based-composer-classification-2010.14805</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-midi-based-composer-classification-2010.14805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-midi-based-composer-classification-2010.14805"/></url>
<url><loc>https://scifaro.com/en/abs/int8-winograd-acceleration-for-conv1d-equipped-asr-models-deployed-on-mobile-devices-2010.14841</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/int8-winograd-acceleration-for-conv1d-equipped-asr-models-deployed-on-mobile-devices-2010.14841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/int8-winograd-acceleration-for-conv1d-equipped-asr-models-deployed-on-mobile-devices-2010.14841"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-transformer-asr-with-ctc-enhanced-decoder-input-2010.15025</loc><lastmod>2021-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-transformer-asr-with-ctc-enhanced-decoder-input-2010.15025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-transformer-asr-with-ctc-enhanced-decoder-input-2010.15025"/></url>
<url><loc>https://scifaro.com/en/abs/gender-bias-in-depression-detection-using-audio-features-2010.15120</loc><lastmod>2021-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-bias-in-depression-detection-using-audio-features-2010.15120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-bias-in-depression-detection-using-audio-features-2010.15120"/></url>
<url><loc>https://scifaro.com/en/abs/improving-perceptual-quality-by-phone-fortified-perceptual-loss-using-wasserstein-distance-for-speech-enhancement-2010.15174</loc><lastmod>2021-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-perceptual-quality-by-phone-fortified-perceptual-loss-using-wasserstein-distance-for-speech-enhancement-2010.15174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-perceptual-quality-by-phone-fortified-perceptual-loss-using-wasserstein-distance-for-speech-enhancement-2010.15174"/></url>
<url><loc>https://scifaro.com/en/abs/dnsmos-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2010.15258</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnsmos-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2010.15258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnsmos-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2010.15258"/></url>
<url><loc>https://scifaro.com/en/abs/the-iqiyi-system-for-voice-conversion-challenge-2020-2010.15317</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-iqiyi-system-for-voice-conversion-challenge-2020-2010.15317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-iqiyi-system-for-voice-conversion-challenge-2020-2010.15317"/></url>
<url><loc>https://scifaro.com/en/abs/stabilizing-label-assignment-for-speech-separation-by-self-supervised-pre-training-2010.15366</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stabilizing-label-assignment-for-speech-separation-by-self-supervised-pre-training-2010.15366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stabilizing-label-assignment-for-speech-separation-by-self-supervised-pre-training-2010.15366"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-embeddings-with-user-listening-data-for-content-based-music-recommendation-2010.15389</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-embeddings-with-user-listening-data-for-content-based-music-recommendation-2010.15389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-embeddings-with-user-listening-data-for-content-based-music-recommendation-2010.15389"/></url>
<url><loc>https://scifaro.com/en/abs/playing-a-part-speaker-verification-at-the-movies-2010.15716</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/playing-a-part-speaker-verification-at-the-movies-2010.15716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/playing-a-part-speaker-verification-at-the-movies-2010.15716"/></url>
<url><loc>https://scifaro.com/en/abs/gans-reels-creating-irish-music-using-a-generative-adversarial-network-2010.15772</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gans-reels-creating-irish-music-using-a-generative-adversarial-network-2010.15772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gans-reels-creating-irish-music-using-a-generative-adversarial-network-2010.15772"/></url>
<url><loc>https://scifaro.com/en/abs/the-ins-and-outs-of-speaker-recognition-lessons-from-voxsrc-2020-2010.15809</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ins-and-outs-of-speaker-recognition-lessons-from-voxsrc-2020-2010.15809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ins-and-outs-of-speaker-recognition-lessons-from-voxsrc-2020-2010.15809"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-correlates-of-the-voice-qualifiers-a-survey-2010.15869</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-correlates-of-the-voice-qualifiers-a-survey-2010.15869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-correlates-of-the-voice-qualifiers-a-survey-2010.15869"/></url>
<url><loc>https://scifaro.com/en/abs/latent-space-oddity-exploring-latent-spaces-to-design-guitar-timbres-2010.15989</loc><lastmod>2020-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-space-oddity-exploring-latent-spaces-to-design-guitar-timbres-2010.15989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-space-oddity-exploring-latent-spaces-to-design-guitar-timbres-2010.15989"/></url>
<url><loc>https://scifaro.com/en/abs/t-vectors-weakly-supervised-speaker-identification-using-hierarchical-transformer-model-2010.16071</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-vectors-weakly-supervised-speaker-identification-using-hierarchical-transformer-model-2010.16071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-vectors-weakly-supervised-speaker-identification-using-hierarchical-transformer-model-2010.16071"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speaker-vector-normalization-with-maximum-gaussianality-training-2010.16148</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speaker-vector-normalization-with-maximum-gaussianality-training-2010.16148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speaker-vector-normalization-with-maximum-gaussianality-training-2010.16148"/></url>
<url><loc>https://scifaro.com/en/abs/audvowelconsnet-a-phoneme-level-based-deep-cnn-architecture-for-clinical-depression-diagnosis-2010.16201</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audvowelconsnet-a-phoneme-level-based-deep-cnn-architecture-for-clinical-depression-diagnosis-2010.16201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audvowelconsnet-a-phoneme-level-based-deep-cnn-architecture-for-clinical-depression-diagnosis-2010.16201"/></url>
<url><loc>https://scifaro.com/en/abs/respirenet-a-deep-neural-network-for-accurately-detecting-abnormal-lung-sounds-in-limited-data-setting-2011.00196</loc><lastmod>2021-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/respirenet-a-deep-neural-network-for-accurately-detecting-abnormal-lung-sounds-in-limited-data-setting-2011.00196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/respirenet-a-deep-neural-network-for-accurately-detecting-abnormal-lung-sounds-in-limited-data-setting-2011.00196"/></url>
<url><loc>https://scifaro.com/en/abs/the-xx205-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.00200</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-xx205-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.00200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-xx205-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.00200"/></url>
<url><loc>https://scifaro.com/en/abs/learning-generic-feature-representation-with-synthetic-data-for-weakly-supervised-sound-event-detection-by-inter-frame-distance-loss-2011.00695</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-generic-feature-representation-with-synthetic-data-for-weakly-supervised-sound-event-detection-by-inter-frame-distance-loss-2011.00695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-generic-feature-representation-with-synthetic-data-for-weakly-supervised-sound-event-detection-by-inter-frame-distance-loss-2011.00695"/></url>
<url><loc>https://scifaro.com/en/abs/using-a-bi-directional-lstm-model-with-attention-mechanism-trained-on-midi-data-for-generating-unique-music-2011.00773</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-a-bi-directional-lstm-model-with-attention-mechanism-trained-on-midi-data-for-generating-unique-music-2011.00773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-a-bi-directional-lstm-model-with-attention-mechanism-trained-on-midi-data-for-generating-unique-music-2011.00773"/></url>
<url><loc>https://scifaro.com/en/abs/cvc-contrastive-learning-for-non-parallel-voice-conversion-2011.00782</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cvc-contrastive-learning-for-non-parallel-voice-conversion-2011.00782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cvc-contrastive-learning-for-non-parallel-voice-conversion-2011.00782"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-and-separation-a-benchmark-on-desed-synthetic-soundscapes-2011.00801</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-and-separation-a-benchmark-on-desed-synthetic-soundscapes-2011.00801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-and-separation-a-benchmark-on-desed-synthetic-soundscapes-2011.00801"/></url>
<url><loc>https://scifaro.com/en/abs/what-s-all-the-fuss-about-free-universal-sound-separation-data-2011.00803</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-s-all-the-fuss-about-free-universal-sound-separation-data-2011.00803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-s-all-the-fuss-about-free-universal-sound-separation-data-2011.00803"/></url>
<url><loc>https://scifaro.com/en/abs/into-the-wild-with-audioscope-unsupervised-audio-visual-separation-of-on-screen-sounds-2011.01143</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/into-the-wild-with-audioscope-unsupervised-audio-visual-separation-of-on-screen-sounds-2011.01143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/into-the-wild-with-audioscope-unsupervised-audio-visual-separation-of-on-screen-sounds-2011.01143"/></url>
<url><loc>https://scifaro.com/en/abs/optimize-what-matters-training-dnn-hmm-keyword-spotting-model-using-end-metric-2011.01151</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimize-what-matters-training-dnn-hmm-keyword-spotting-model-using-end-metric-2011.01151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimize-what-matters-training-dnn-hmm-keyword-spotting-model-using-end-metric-2011.01151"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-approach-to-device-robust-acoustic-scene-classification-2011.01447</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-approach-to-device-robust-acoustic-scene-classification-2011.01447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-approach-to-device-robust-acoustic-scene-classification-2011.01447"/></url>
<url><loc>https://scifaro.com/en/abs/shanerun-system-description-to-voxceleb-speaker-recognition-challenge-2020-2011.01518</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shanerun-system-description-to-voxceleb-speaker-recognition-challenge-2020-2011.01518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shanerun-system-description-to-voxceleb-speaker-recognition-challenge-2020-2011.01518"/></url>
<url><loc>https://scifaro.com/en/abs/two-heads-are-better-than-one-a-two-stage-approach-for-monaural-noise-reduction-in-the-complex-domain-2011.01561</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-heads-are-better-than-one-a-two-stage-approach-for-monaural-noise-reduction-in-the-complex-domain-2011.01561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-heads-are-better-than-one-a-two-stage-approach-for-monaural-noise-reduction-in-the-complex-domain-2011.01561"/></url>
<url><loc>https://scifaro.com/en/abs/shift-if-you-can-counting-and-visualising-correction-operations-for-beat-tracking-evaluation-2011.01637</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shift-if-you-can-counting-and-visualising-correction-operations-for-beat-tracking-evaluation-2011.01637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shift-if-you-can-counting-and-visualising-correction-operations-for-beat-tracking-evaluation-2011.01637"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-text-independent-speaker-verification-for-embedded-systems-2011.01709</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-text-independent-speaker-verification-for-embedded-systems-2011.01709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-text-independent-speaker-verification-for-embedded-systems-2011.01709"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-trust-deep-speech-prior-2011.02110</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-trust-deep-speech-prior-2011.02110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-trust-deep-speech-prior-2011.02110"/></url>
<url><loc>https://scifaro.com/en/abs/desnet-a-multi-channel-network-for-simultaneous-speech-dereverberation-enhancement-and-separation-2011.02131</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/desnet-a-multi-channel-network-for-simultaneous-speech-dereverberation-enhancement-and-separation-2011.02131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/desnet-a-multi-channel-network-for-simultaneous-speech-dereverberation-enhancement-and-separation-2011.02131"/></url>
<url><loc>https://scifaro.com/en/abs/ieee-slt-2021-alpha-mini-speech-challenge-open-datasets-tracks-rules-and-baselines-2011.02198</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ieee-slt-2021-alpha-mini-speech-challenge-open-datasets-tracks-rules-and-baselines-2011.02198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ieee-slt-2021-alpha-mini-speech-challenge-open-datasets-tracks-rules-and-baselines-2011.02198"/></url>
<url><loc>https://scifaro.com/en/abs/vaw-gan-for-disentanglement-and-recomposition-of-emotional-elements-in-speech-2011.02314</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vaw-gan-for-disentanglement-and-recomposition-of-emotional-elements-in-speech-2011.02314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vaw-gan-for-disentanglement-and-recomposition-of-emotional-elements-in-speech-2011.02314"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-voice-separation-for-unknown-number-of-speakers-under-reverberant-and-noisy-settings-2011.02329</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-voice-separation-for-unknown-number-of-speakers-under-reverberant-and-noisy-settings-2011.02329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-voice-separation-for-unknown-number-of-speakers-under-reverberant-and-noisy-settings-2011.02329"/></url>
<url><loc>https://scifaro.com/en/abs/bw-eda-eend-streaming-end-to-end-neural-speaker-diarization-for-a-variable-number-of-speakers-2011.02678</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bw-eda-eend-streaming-end-to-end-neural-speaker-diarization-for-a-variable-number-of-speakers-2011.02678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bw-eda-eend-streaming-end-to-end-neural-speaker-diarization-for-a-variable-number-of-speakers-2011.02678"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-for-singing-synthesis-timbre-2011.02809</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-for-singing-synthesis-timbre-2011.02809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-for-singing-synthesis-timbre-2011.02809"/></url>
<url><loc>https://scifaro.com/en/abs/influence-of-event-duration-on-automatic-wheeze-classification-2011.02874</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/influence-of-event-duration-on-automatic-wheeze-classification-2011.02874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/influence-of-event-duration-on-automatic-wheeze-classification-2011.02874"/></url>
<url><loc>https://scifaro.com/en/abs/query-expansion-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.02882</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/query-expansion-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.02882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/query-expansion-system-for-the-voxceleb-speaker-recognition-challenge-2020-2011.02882"/></url>
<url><loc>https://scifaro.com/en/abs/from-note-level-to-chord-level-neural-network-models-for-voice-separation-in-symbolic-music-2011.03028</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-note-level-to-chord-level-neural-network-models-for-voice-separation-in-symbolic-music-2011.03028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-note-level-to-chord-level-neural-network-models-for-voice-separation-in-symbolic-music-2011.03028"/></url>
<url><loc>https://scifaro.com/en/abs/robust-enf-estimation-based-on-harmonic-enhancement-and-maximum-weight-clique-2011.03414</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-enf-estimation-based-on-harmonic-enhancement-and-maximum-weight-clique-2011.03414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-enf-estimation-based-on-harmonic-enhancement-and-maximum-weight-clique-2011.03414"/></url>
<url><loc>https://scifaro.com/en/abs/non-local-convolutional-neural-networks-nlcnn-for-speaker-recognition-2011.03682</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-local-convolutional-neural-networks-nlcnn-for-speaker-recognition-2011.03682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-local-convolutional-neural-networks-nlcnn-for-speaker-recognition-2011.03682"/></url>
<url><loc>https://scifaro.com/en/abs/detection-and-evaluation-of-human-and-machine-generated-speech-in-spoofing-attacks-on-automatic-speaker-verification-systems-2011.03689</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-and-evaluation-of-human-and-machine-generated-speech-in-spoofing-attacks-on-automatic-speaker-verification-systems-2011.03689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-and-evaluation-of-human-and-machine-generated-speech-in-spoofing-attacks-on-automatic-speaker-verification-systems-2011.03689"/></url>
<url><loc>https://scifaro.com/en/abs/dual-application-of-speech-enhancement-for-automatic-speech-recognition-2011.03840</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-application-of-speech-enhancement-for-automatic-speech-recognition-2011.03840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-application-of-speech-enhancement-for-automatic-speech-recognition-2011.03840"/></url>
<url><loc>https://scifaro.com/en/abs/denoising-and-dereverberation-hierarchical-neural-vocoder-for-robust-waveform-generation-2011.03955</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoising-and-dereverberation-hierarchical-neural-vocoder-for-robust-waveform-generation-2011.03955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoising-and-dereverberation-hierarchical-neural-vocoder-for-robust-waveform-generation-2011.03955"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-gating-improved-convolutional-neural-networks-for-speech-enhancement-in-the-time-frequency-domain-2011.04092</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-gating-improved-convolutional-neural-networks-for-speech-enhancement-in-the-time-frequency-domain-2011.04092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-gating-improved-convolutional-neural-networks-for-speech-enhancement-in-the-time-frequency-domain-2011.04092"/></url>
<url><loc>https://scifaro.com/en/abs/gated-recurrent-fusion-with-joint-training-framework-for-robust-end-to-end-speech-recognition-2011.04249</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gated-recurrent-fusion-with-joint-training-framework-for-robust-end-to-end-speech-recognition-2011.04249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gated-recurrent-fusion-with-joint-training-framework-for-robust-end-to-end-speech-recognition-2011.04249"/></url>
<url><loc>https://scifaro.com/en/abs/stoi-net-a-deep-learning-based-non-intrusive-speech-intelligibility-assessment-model-2011.04292</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stoi-net-a-deep-learning-based-non-intrusive-speech-intelligibility-assessment-model-2011.04292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stoi-net-a-deep-learning-based-non-intrusive-speech-intelligibility-assessment-model-2011.04292"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-singing-voice-detection-2011.04297</loc><lastmod>2021-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-singing-voice-detection-2011.04297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-singing-voice-detection-2011.04297"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-patient-detection-from-telephone-quality-speech-data-2011.04299</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-patient-detection-from-telephone-quality-speech-data-2011.04299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-patient-detection-from-telephone-quality-speech-data-2011.04299"/></url>
<url><loc>https://scifaro.com/en/abs/masked-proxy-loss-for-text-independent-speaker-verification-2011.04491</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-proxy-loss-for-text-independent-speaker-verification-2011.04491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-proxy-loss-for-text-independent-speaker-verification-2011.04491"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-children-s-speech-recognition-the-ethiopian-system-for-the-slt-2021-children-speech-recognition-challenge-2011.04547</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-children-s-speech-recognition-the-ethiopian-system-for-the-slt-2021-children-speech-recognition-challenge-2011.04547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-children-s-speech-recognition-the-ethiopian-system-for-the-slt-2021-children-speech-recognition-challenge-2011.04547"/></url>
<url><loc>https://scifaro.com/en/abs/musical-analysis-of-stravinski-s-the-rite-of-spring-based-on-computational-methods-2011.04568</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-analysis-of-stravinski-s-the-rite-of-spring-based-on-computational-methods-2011.04568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-analysis-of-stravinski-s-the-rite-of-spring-based-on-computational-methods-2011.04568"/></url>
<url><loc>https://scifaro.com/en/abs/frill-a-non-semantic-speech-embedding-for-mobile-devices-2011.04609</loc><lastmod>2022-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frill-a-non-semantic-speech-embedding-for-mobile-devices-2011.04609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frill-a-non-semantic-speech-embedding-for-mobile-devices-2011.04609"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-de-identification-system-using-autoencoders-and-adversarial-training-2011.04696</loc><lastmod>2021-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-de-identification-system-using-autoencoders-and-adversarial-training-2011.04696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-de-identification-system-using-autoencoders-and-adversarial-training-2011.04696"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-strategies-waveform-model-choice-and-acoustic-configurations-for-multi-speaker-end-to-end-speech-synthesis-2011.04839</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-strategies-waveform-model-choice-and-acoustic-configurations-for-multi-speaker-end-to-end-speech-synthesis-2011.04839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-strategies-waveform-model-choice-and-acoustic-configurations-for-multi-speaker-end-to-end-speech-synthesis-2011.04839"/></url>
<url><loc>https://scifaro.com/en/abs/deconstruct-and-reconstruct-dizi-music-of-the-northern-school-and-the-southern-school-2011.04974</loc><lastmod>2020-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deconstruct-and-reconstruct-dizi-music-of-the-northern-school-and-the-southern-school-2011.04974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deconstruct-and-reconstruct-dizi-music-of-the-northern-school-and-the-southern-school-2011.04974"/></url>
<url><loc>https://scifaro.com/en/abs/ganterpretations-2011.05158</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ganterpretations-2011.05158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ganterpretations-2011.05158"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-attention-for-speaker-recognition-2011.05189</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-attention-for-speaker-recognition-2011.05189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-attention-for-speaker-recognition-2011.05189"/></url>
<url><loc>https://scifaro.com/en/abs/sound-synthesis-propagation-and-rendering-a-survey-2011.05538</loc><lastmod>2021-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-synthesis-propagation-and-rendering-a-survey-2011.05538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-synthesis-propagation-and-rendering-a-survey-2011.05538"/></url>
<url><loc>https://scifaro.com/en/abs/deep-time-delay-neural-network-for-speech-enhancement-with-full-data-learning-2011.05591</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-time-delay-neural-network-for-speech-enhancement-with-full-data-learning-2011.05591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-time-delay-neural-network-for-speech-enhancement-with-full-data-learning-2011.05591"/></url>
<url><loc>https://scifaro.com/en/abs/wadenet-wavelet-decomposition-based-cnn-for-speech-processing-2011.05594</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wadenet-wavelet-decomposition-based-cnn-for-speech-processing-2011.05594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wadenet-wavelet-decomposition-based-cnn-for-speech-processing-2011.05594"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-neural-lyrics-and-melody-composition-2011.06380</loc><lastmod>2020-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-neural-lyrics-and-melody-composition-2011.06380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-neural-lyrics-and-melody-composition-2011.06380"/></url>
<url><loc>https://scifaro.com/en/abs/using-ipa-based-tacotron-for-data-efficient-cross-lingual-speaker-adaptation-and-pronunciation-enhancement-2011.06392</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-ipa-based-tacotron-for-data-efficient-cross-lingual-speaker-adaptation-and-pronunciation-enhancement-2011.06392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-ipa-based-tacotron-for-data-efficient-cross-lingual-speaker-adaptation-and-pronunciation-enhancement-2011.06392"/></url>
<url><loc>https://scifaro.com/en/abs/the-slt-2021-children-speech-recognition-challenge-open-datasets-rules-and-baselines-2011.06724</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-slt-2021-children-speech-recognition-challenge-open-datasets-rules-and-baselines-2011.06724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-slt-2021-children-speech-recognition-challenge-open-datasets-rules-and-baselines-2011.06724"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-on-deep-music-generation-multi-level-representations-algorithms-evaluations-and-future-directions-2011.06801</loc><lastmod>2020-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-deep-music-generation-multi-level-representations-algorithms-evaluations-and-future-directions-2011.06801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-deep-music-generation-multi-level-representations-algorithms-evaluations-and-future-directions-2011.06801"/></url>
<url><loc>https://scifaro.com/en/abs/communication-cost-aware-microphone-selection-for-neural-speech-enhancement-with-ad-hoc-microphone-arrays-2011.07348</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/communication-cost-aware-microphone-selection-for-neural-speech-enhancement-with-ad-hoc-microphone-arrays-2011.07348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/communication-cost-aware-microphone-selection-for-neural-speech-enhancement-with-ad-hoc-microphone-arrays-2011.07348"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-enhancement-performance-by-leveraging-contextual-broad-phonetic-class-information-2011.07442</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-enhancement-performance-by-leveraging-contextual-broad-phonetic-class-information-2011.07442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-enhancement-performance-by-leveraging-contextual-broad-phonetic-class-information-2011.07442"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-and-perceptual-discrimination-between-dysarthria-apraxia-of-speech-and-neurotypical-speech-2011.07542</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-and-perceptual-discrimination-between-dysarthria-apraxia-of-speech-and-neurotypical-speech-2011.07542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-and-perceptual-discrimination-between-dysarthria-apraxia-of-speech-and-neurotypical-speech-2011.07542"/></url>
<url><loc>https://scifaro.com/en/abs/learning-frame-similarity-using-siamese-networks-for-audio-to-score-alignment-2011.07546</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-frame-similarity-using-siamese-networks-for-audio-to-score-alignment-2011.07546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-frame-similarity-using-siamese-networks-for-audio-to-score-alignment-2011.07546"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-contrastive-learning-of-sound-event-representations-2011.07616</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-contrastive-learning-of-sound-event-representations-2011.07616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-contrastive-learning-of-sound-event-representations-2011.07616"/></url>
<url><loc>https://scifaro.com/en/abs/learn2sing-target-speaker-singing-voice-synthesis-by-learning-from-a-singing-teacher-2011.08467</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn2sing-target-speaker-singing-voice-synthesis-by-learning-from-a-singing-teacher-2011.08467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn2sing-target-speaker-singing-voice-synthesis-by-learning-from-a-singing-teacher-2011.08467"/></url>
<url><loc>https://scifaro.com/en/abs/cascade-rnn-transducer-syllable-based-streaming-on-device-mandarin-speech-recognition-with-a-syllable-to-character-converter-2011.08469</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascade-rnn-transducer-syllable-based-streaming-on-device-mandarin-speech-recognition-with-a-syllable-to-character-converter-2011.08469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascade-rnn-transducer-syllable-based-streaming-on-device-mandarin-speech-recognition-with-a-syllable-to-character-converter-2011.08469"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-emotion-strength-transfer-control-and-prediction-for-emotional-speech-synthesis-2011.08477</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-emotion-strength-transfer-control-and-prediction-for-emotional-speech-synthesis-2011.08477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-emotion-strength-transfer-control-and-prediction-for-emotional-speech-synthesis-2011.08477"/></url>
<url><loc>https://scifaro.com/en/abs/foolhd-fooling-speaker-identification-by-highly-imperceptible-adversarial-disturbances-2011.08483</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foolhd-fooling-speaker-identification-by-highly-imperceptible-adversarial-disturbances-2011.08483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foolhd-fooling-speaker-identification-by-highly-imperceptible-adversarial-disturbances-2011.08483"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-voice-conversion-network-with-cycle-consistency-loss-of-speaker-identity-2011.08548</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-voice-conversion-network-with-cycle-consistency-loss-of-speaker-identity-2011.08548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-voice-conversion-network-with-cycle-consistency-loss-of-speaker-identity-2011.08548"/></url>
<url><loc>https://scifaro.com/en/abs/accent-and-speaker-disentanglement-in-many-to-many-voice-conversion-2011.08609</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-and-speaker-disentanglement-in-many-to-many-voice-conversion-2011.08609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-and-speaker-disentanglement-in-many-to-many-voice-conversion-2011.08609"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-training-for-multi-domain-speaker-recognition-2011.08623</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-training-for-multi-domain-speaker-recognition-2011.08623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-training-for-multi-domain-speaker-recognition-2011.08623"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-emotion-transfer-for-end-to-end-speech-synthesis-2011.08679</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-emotion-transfer-for-end-to-end-speech-synthesis-2011.08679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-emotion-transfer-for-end-to-end-speech-synthesis-2011.08679"/></url>
<url><loc>https://scifaro.com/en/abs/vertical-horizontal-structured-attention-for-generating-music-with-chords-2011.09078</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vertical-horizontal-structured-attention-for-generating-music-with-chords-2011.09078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vertical-horizontal-structured-attention-for-generating-music-with-chords-2011.09078"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-automatic-speech-recognition-using-deep-complex-unet-2011.09081</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-automatic-speech-recognition-using-deep-complex-unet-2011.09081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-automatic-speech-recognition-using-deep-complex-unet-2011.09081"/></url>
<url><loc>https://scifaro.com/en/abs/expanding-access-to-music-technology-rapid-prototyping-accessible-instrument-solutions-for-musicians-with-intellectual-disabilities-2011.09143</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expanding-access-to-music-technology-rapid-prototyping-accessible-instrument-solutions-for-musicians-with-intellectual-disabilities-2011.09143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expanding-access-to-music-technology-rapid-prototyping-accessible-instrument-solutions-for-musicians-with-intellectual-disabilities-2011.09143"/></url>
<url><loc>https://scifaro.com/en/abs/caa-net-conditional-atrous-cnns-with-attention-for-explainable-device-robust-acoustic-scene-classification-2011.09299</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/caa-net-conditional-atrous-cnns-with-attention-for-explainable-device-robust-acoustic-scene-classification-2011.09299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/caa-net-conditional-atrous-cnns-with-attention-for-explainable-device-robust-acoustic-scene-classification-2011.09299"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-rnnlm-rescoring-for-conversational-speech-recognition-2011.09301</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-rnnlm-rescoring-for-conversational-speech-recognition-2011.09301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-rnnlm-rescoring-for-conversational-speech-recognition-2011.09301"/></url>
<url><loc>https://scifaro.com/en/abs/deep-residual-local-feature-learning-for-speech-emotion-recognition-2011.09767</loc><lastmod>2020-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-residual-local-feature-learning-for-speech-emotion-recognition-2011.09767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-residual-local-feature-learning-for-speech-emotion-recognition-2011.09767"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-learning-for-speech-separation-2011.10233</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-learning-for-speech-separation-2011.10233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-learning-for-speech-separation-2011.10233"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-voice-conversion-based-data-augmentation-in-text-dependent-speaker-verification-2011.10710</loc><lastmod>2020-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-voice-conversion-based-data-augmentation-in-text-dependent-speaker-verification-2011.10710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-voice-conversion-based-data-augmentation-in-text-dependent-speaker-verification-2011.10710"/></url>
<url><loc>https://scifaro.com/en/abs/speech-command-recognition-in-computationally-constrained-environments-with-a-quadratic-self-organized-operational-layer-2011.11436</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-command-recognition-in-computationally-constrained-environments-with-a-quadratic-self-organized-operational-layer-2011.11436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-command-recognition-in-computationally-constrained-environments-with-a-quadratic-self-organized-operational-layer-2011.11436"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-multimodal-music-genre-classifier-using-hierarchical-attention-and-convolutional-neural-network-2011.11970</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-multimodal-music-genre-classifier-using-hierarchical-attention-and-convolutional-neural-network-2011.11970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-multimodal-music-genre-classifier-using-hierarchical-attention-and-convolutional-neural-network-2011.11970"/></url>
<url><loc>https://scifaro.com/en/abs/multi-decoder-dprnn-high-accuracy-source-counting-and-separation-2011.12022</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-decoder-dprnn-high-accuracy-source-counting-and-separation-2011.12022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-decoder-dprnn-high-accuracy-source-counting-and-separation-2011.12022"/></url>
<url><loc>https://scifaro.com/en/abs/deep-discriminative-feature-learning-for-accent-recognition-2011.12461</loc><lastmod>2021-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-discriminative-feature-learning-for-accent-recognition-2011.12461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-discriminative-feature-learning-for-accent-recognition-2011.12461"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-tract-length-perturbation-for-text-dependent-speaker-verification-with-autoregressive-prediction-coding-2011.12536</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-tract-length-perturbation-for-text-dependent-speaker-verification-with-autoregressive-prediction-coding-2011.12536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-tract-length-perturbation-for-text-dependent-speaker-verification-with-autoregressive-prediction-coding-2011.12536"/></url>
<url><loc>https://scifaro.com/en/abs/mtcrnn-a-multi-scale-rnn-for-directed-audio-texture-synthesis-2011.12596</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mtcrnn-a-multi-scale-rnn-for-directed-audio-texture-synthesis-2011.12596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mtcrnn-a-multi-scale-rnn-for-directed-audio-texture-synthesis-2011.12596"/></url>
<url><loc>https://scifaro.com/en/abs/feature-selection-based-on-principal-component-analysis-for-underwater-source-localization-by-deep-learning-2011.12754</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-selection-based-on-principal-component-analysis-for-underwater-source-localization-by-deep-learning-2011.12754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-selection-based-on-principal-component-analysis-for-underwater-source-localization-by-deep-learning-2011.12754"/></url>
<url><loc>https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-application-to-audio-signal-recovery-2011.12818</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-application-to-audio-signal-recovery-2011.12818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-retrieval-with-bregman-divergences-application-to-audio-signal-recovery-2011.12818"/></url>
<url><loc>https://scifaro.com/en/abs/mask-net-learning-context-aware-invariant-features-using-adversarial-forgetting-student-abstract-2011.12979</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-net-learning-context-aware-invariant-features-using-adversarial-forgetting-student-abstract-2011.12979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-net-learning-context-aware-invariant-features-using-adversarial-forgetting-student-abstract-2011.12979"/></url>
<url><loc>https://scifaro.com/en/abs/fbwave-efficient-and-scalable-neural-vocoders-for-streaming-text-to-speech-on-the-edge-2011.12985</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fbwave-efficient-and-scalable-neural-vocoders-for-streaming-text-to-speech-on-the-edge-2011.12985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fbwave-efficient-and-scalable-neural-vocoders-for-streaming-text-to-speech-on-the-edge-2011.12985"/></url>
<url><loc>https://scifaro.com/en/abs/can-gan-originate-new-electronic-dance-music-genres-generating-novel-rhythm-patterns-using-gan-with-genre-ambiguity-loss-2011.13062</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-gan-originate-new-electronic-dance-music-genres-generating-novel-rhythm-patterns-using-gan-with-genre-ambiguity-loss-2011.13062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-gan-originate-new-electronic-dance-music-genres-generating-novel-rhythm-patterns-using-gan-with-genre-ambiguity-loss-2011.13062"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-error-correction-and-performance-aid-for-midi-instruments-2011.13122</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-error-correction-and-performance-aid-for-midi-instruments-2011.13122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-error-correction-and-performance-aid-for-midi-instruments-2011.13122"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-end-to-end-multi-talker-speech-recognition-2011.13148</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-end-to-end-multi-talker-speech-recognition-2011.13148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-end-to-end-multi-talker-speech-recognition-2011.13148"/></url>
<url><loc>https://scifaro.com/en/abs/virufy-global-applicability-of-crowdsourced-and-clinical-datasets-for-ai-detection-of-covid-19-from-cough-2011.13320</loc><lastmod>2021-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/virufy-global-applicability-of-crowdsourced-and-clinical-datasets-for-ai-detection-of-covid-19-from-cough-2011.13320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/virufy-global-applicability-of-crowdsourced-and-clinical-datasets-for-ai-detection-of-covid-19-from-cough-2011.13320"/></url>
<url><loc>https://scifaro.com/en/abs/improving-rnn-transducer-with-target-speaker-extraction-and-neural-uncertainty-estimation-2011.13393</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-rnn-transducer-with-target-speaker-extraction-and-neural-uncertainty-estimation-2011.13393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-rnn-transducer-with-target-speaker-extraction-and-neural-uncertainty-estimation-2011.13393"/></url>
<url><loc>https://scifaro.com/en/abs/towards-movement-generation-with-audio-features-2011.13453</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-movement-generation-with-audio-features-2011.13453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-movement-generation-with-audio-features-2011.13453"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-handcrafted-parameterized-and-learnable-features-for-speech-separation-2011.14295</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-handcrafted-parameterized-and-learnable-features-for-speech-separation-2011.14295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-handcrafted-parameterized-and-learnable-features-for-speech-separation-2011.14295"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-separation-with-adversarially-disentangled-visual-representation-2011.14334</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-separation-with-adversarially-disentangled-visual-representation-2011.14334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-separation-with-adversarially-disentangled-visual-representation-2011.14334"/></url>
<url><loc>https://scifaro.com/en/abs/an-features-extraction-and-recognition-method-for-underwater-acoustic-target-based-on-atcnn-2011.14336</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-features-extraction-and-recognition-method-for-underwater-acoustic-target-based-on-atcnn-2011.14336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-features-extraction-and-recognition-method-for-underwater-acoustic-target-based-on-atcnn-2011.14336"/></url>
<url><loc>https://scifaro.com/en/abs/audio-speech-language-signal-processing-for-covid-19-a-comprehensive-overview-2011.14445</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-speech-language-signal-processing-for-covid-19-a-comprehensive-overview-2011.14445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-speech-language-signal-processing-for-covid-19-a-comprehensive-overview-2011.14445"/></url>
<url><loc>https://scifaro.com/en/abs/look-who-s-not-talking-2011.14885</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/look-who-s-not-talking-2011.14885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/look-who-s-not-talking-2011.14885"/></url>
<url><loc>https://scifaro.com/en/abs/convolutive-transfer-function-invariant-sdr-training-criteria-for-multi-channel-reverberant-speech-separation-2011.15003</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutive-transfer-function-invariant-sdr-training-criteria-for-multi-channel-reverberant-speech-separation-2011.15003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutive-transfer-function-invariant-sdr-training-criteria-for-multi-channel-reverberant-speech-separation-2011.15003"/></url>
<url><loc>https://scifaro.com/en/abs/strike-on-stage-a-percussion-and-media-performance-2012.00250</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/strike-on-stage-a-percussion-and-media-performance-2012.00250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/strike-on-stage-a-percussion-and-media-performance-2012.00250"/></url>
<url><loc>https://scifaro.com/en/abs/performing-with-a-mobile-computer-system-for-vibraphone-2012.00265</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performing-with-a-mobile-computer-system-for-vibraphone-2012.00265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performing-with-a-mobile-computer-system-for-vibraphone-2012.00265"/></url>
<url><loc>https://scifaro.com/en/abs/musictm-dataset-for-joint-representation-learning-among-sheet-music-lyrics-and-musical-audio-2012.00290</loc><lastmod>2021-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musictm-dataset-for-joint-representation-learning-among-sheet-music-lyrics-and-musical-audio-2012.00290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musictm-dataset-for-joint-representation-learning-among-sheet-music-lyrics-and-musical-audio-2012.00290"/></url>
<url><loc>https://scifaro.com/en/abs/nhss-a-speech-and-singing-parallel-database-2012.00337</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nhss-a-speech-and-singing-parallel-database-2012.00337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nhss-a-speech-and-singing-parallel-database-2012.00337"/></url>
<url><loc>https://scifaro.com/en/abs/deep-ad-hoc-beamforming-based-on-speaker-extraction-for-target-dependent-speech-separation-2012.00403</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-ad-hoc-beamforming-based-on-speaker-extraction-for-target-dependent-speech-separation-2012.00403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-ad-hoc-beamforming-based-on-speaker-extraction-for-target-dependent-speech-separation-2012.00403"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-generation-using-deep-recurrent-networks-and-embeddings-a-study-case-in-music-2012.01231</loc><lastmod>2020-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-generation-using-deep-recurrent-networks-and-embeddings-a-study-case-in-music-2012.01231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-generation-using-deep-recurrent-networks-and-embeddings-a-study-case-in-music-2012.01231"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-spatial-clustering-based-time-frequency-masks-using-lstm-neural-networks-2012.01576</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-spatial-clustering-based-time-frequency-masks-using-lstm-neural-networks-2012.01576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-spatial-clustering-based-time-frequency-masks-using-lstm-neural-networks-2012.01576"/></url>
<url><loc>https://scifaro.com/en/abs/melglow-efficient-waveform-generative-network-based-on-location-variable-convolution-2012.01684</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melglow-efficient-waveform-generative-network-based-on-location-variable-convolution-2012.01684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melglow-efficient-waveform-generative-network-based-on-location-variable-convolution-2012.01684"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-posteriorgrams-based-many-to-many-singing-voice-conversion-via-adversarial-training-2012.01837</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-posteriorgrams-based-many-to-many-singing-voice-conversion-via-adversarial-training-2012.01837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-posteriorgrams-based-many-to-many-singing-voice-conversion-via-adversarial-training-2012.01837"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-cough-classification-using-machine-learning-and-global-smartphone-recordings-2012.01926</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-cough-classification-using-machine-learning-and-global-smartphone-recordings-2012.01926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-cough-classification-using-machine-learning-and-global-smartphone-recordings-2012.01926"/></url>
<url><loc>https://scifaro.com/en/abs/improved-mvdr-beamforming-using-lstm-speech-models-to-clean-spatial-clustering-masks-2012.02191</loc><lastmod>2020-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-mvdr-beamforming-using-lstm-speech-models-to-clean-spatial-clustering-masks-2012.02191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-mvdr-beamforming-using-lstm-speech-models-to-clean-spatial-clustering-masks-2012.02191"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-hologram-optimisation-using-automatic-differentiation-2012.02431</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-hologram-optimisation-using-automatic-differentiation-2012.02431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-hologram-optimisation-using-automatic-differentiation-2012.02431"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-emotions-perceived-from-sounds-2012.02643</loc><lastmod>2020-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-emotions-perceived-from-sounds-2012.02643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-emotions-perceived-from-sounds-2012.02643"/></url>
<url><loc>https://scifaro.com/en/abs/guitar-effects-recognition-and-parameter-estimation-with-convolutional-neural-networks-2012.03216</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guitar-effects-recognition-and-parameter-estimation-with-convolutional-neural-networks-2012.03216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guitar-effects-recognition-and-parameter-estimation-with-convolutional-neural-networks-2012.03216"/></url>
<url><loc>https://scifaro.com/en/abs/source-separation-and-depthwise-separable-convolutions-for-computer-audition-2012.03359</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-separation-and-depthwise-separable-convolutions-for-computer-audition-2012.03359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-separation-and-depthwise-separable-convolutions-for-computer-audition-2012.03359"/></url>
<url><loc>https://scifaro.com/en/abs/combining-spatial-clustering-with-lstm-speech-models-for-multichannel-speech-enhancement-2012.03388</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-spatial-clustering-with-lstm-speech-models-for-multichannel-speech-enhancement-2012.03388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-spatial-clustering-with-lstm-speech-models-for-multichannel-speech-enhancement-2012.03388"/></url>
<url><loc>https://scifaro.com/en/abs/multi-instrumentalist-net-unsupervised-generation-of-music-from-body-movements-2012.03478</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-instrumentalist-net-unsupervised-generation-of-music-from-body-movements-2012.03478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-instrumentalist-net-unsupervised-generation-of-music-from-body-movements-2012.03478"/></url>
<url><loc>https://scifaro.com/en/abs/reverberant-sound-localization-with-a-robot-head-based-on-direct-path-relative-transfer-function-2012.03574</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberant-sound-localization-with-a-robot-head-based-on-direct-path-relative-transfer-function-2012.03574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberant-sound-localization-with-a-robot-head-based-on-direct-path-relative-transfer-function-2012.03574"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-dataset-for-the-identification-of-computer-generated-melodies-in-the-csmt-challenge-2012.03646</loc><lastmod>2021-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-dataset-for-the-identification-of-computer-generated-melodies-in-the-csmt-challenge-2012.03646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-dataset-for-the-identification-of-computer-generated-melodies-in-the-csmt-challenge-2012.03646"/></url>
<url><loc>https://scifaro.com/en/abs/triplet-entropy-loss-improving-the-generalisation-of-short-speech-language-identification-systems-2012.03775</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/triplet-entropy-loss-improving-the-generalisation-of-short-speech-language-identification-systems-2012.03775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/triplet-entropy-loss-improving-the-generalisation-of-short-speech-language-identification-systems-2012.03775"/></url>
<url><loc>https://scifaro.com/en/abs/diverse-melody-generation-from-chinese-lyrics-via-mutual-information-maximization-2012.03805</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diverse-melody-generation-from-chinese-lyrics-via-mutual-information-maximization-2012.03805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diverse-melody-generation-from-chinese-lyrics-via-mutual-information-maximization-2012.03805"/></url>
<url><loc>https://scifaro.com/en/abs/a-geometric-framework-for-pitch-estimation-on-acoustic-musical-signals-2012.04517</loc><lastmod>2020-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-geometric-framework-for-pitch-estimation-on-acoustic-musical-signals-2012.04517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-geometric-framework-for-pitch-estimation-on-acoustic-musical-signals-2012.04517"/></url>
<url><loc>https://scifaro.com/en/abs/i-m-sorry-for-your-loss-spectrally-based-audio-distances-are-bad-at-pitch-2012.04572</loc><lastmod>2020-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-m-sorry-for-your-loss-spectrally-based-audio-distances-are-bad-at-pitch-2012.04572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-m-sorry-for-your-loss-spectrally-based-audio-distances-are-bad-at-pitch-2012.04572"/></url>
<url><loc>https://scifaro.com/en/abs/recent-advances-in-computer-audition-for-diagnosing-covid-19-an-overview-2012.04650</loc><lastmod>2020-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-advances-in-computer-audition-for-diagnosing-covid-19-an-overview-2012.04650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-advances-in-computer-audition-for-diagnosing-covid-19-an-overview-2012.04650"/></url>
<url><loc>https://scifaro.com/en/abs/deeptalk-vocal-style-encoding-for-speaker-recognition-and-speech-synthesis-2012.05084</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deeptalk-vocal-style-encoding-for-speaker-recognition-and-speech-synthesis-2012.05084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deeptalk-vocal-style-encoding-for-speaker-recognition-and-speech-synthesis-2012.05084"/></url>
<url><loc>https://scifaro.com/en/abs/songmass-automatic-song-writing-with-pre-training-and-alignment-constraint-2012.05168</loc><lastmod>2020-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songmass-automatic-song-writing-with-pre-training-and-alignment-constraint-2012.05168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songmass-automatic-song-writing-with-pre-training-and-alignment-constraint-2012.05168"/></url>
<url><loc>https://scifaro.com/en/abs/unified-streaming-and-non-streaming-two-pass-end-to-end-model-for-speech-recognition-2012.05481</loc><lastmod>2021-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-streaming-and-non-streaming-two-pass-end-to-end-model-for-speech-recognition-2012.05481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-streaming-and-non-streaming-two-pass-end-to-end-model-for-speech-recognition-2012.05481"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-wav2vec-2-0-on-speaker-verification-and-language-identification-2012.06185</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-wav2vec-2-0-on-speaker-verification-and-language-identification-2012.06185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-wav2vec-2-0-on-speaker-verification-and-language-identification-2012.06185"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-feature-representations-for-anomalous-sound-detection-2012.06282</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-feature-representations-for-anomalous-sound-detection-2012.06282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-feature-representations-for-anomalous-sound-detection-2012.06282"/></url>
<url><loc>https://scifaro.com/en/abs/voxsrc-2020-the-second-voxceleb-speaker-recognition-challenge-2012.06867</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxsrc-2020-the-second-voxceleb-speaker-recognition-challenge-2012.06867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxsrc-2020-the-second-voxceleb-speaker-recognition-challenge-2012.06867"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-classification-of-rare-chords-with-unlabeled-data-2012.07055</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-classification-of-rare-chords-with-unlabeled-data-2012.07055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-classification-of-rare-chords-with-unlabeled-data-2012.07055"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-als-patients-based-on-acoustic-analysis-of-sustained-vowel-phonations-2012.07347</loc><lastmod>2021-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-als-patients-based-on-acoustic-analysis-of-sustained-vowel-phonations-2012.07347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-als-patients-based-on-acoustic-analysis-of-sustained-vowel-phonations-2012.07347"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-learning-for-deep-neural-network-adaptation-2012.07460</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-learning-for-deep-neural-network-adaptation-2012.07460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-learning-for-deep-neural-network-adaptation-2012.07460"/></url>
<url><loc>https://scifaro.com/en/abs/the-voice-of-covid-19-acoustic-correlates-of-infection-2012.09478</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voice-of-covid-19-acoustic-correlates-of-infection-2012.09478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voice-of-covid-19-acoustic-correlates-of-infection-2012.09478"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-source-localization-and-spectra-generation-from-sparse-beamforming-maps-2012.09643</loc><lastmod>2021-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-source-localization-and-spectra-generation-from-sparse-beamforming-maps-2012.09643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-source-localization-and-spectra-generation-from-sparse-beamforming-maps-2012.09643"/></url>
<url><loc>https://scifaro.com/en/abs/non-uniform-fir-digital-filter-bank-for-hearing-aid-application-using-frequency-response-masking-technique-a-review-2012.10663</loc><lastmod>2020-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-uniform-fir-digital-filter-bank-for-hearing-aid-application-using-frequency-response-masking-technique-a-review-2012.10663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-uniform-fir-digital-filter-bank-for-hearing-aid-application-using-frequency-response-masking-technique-a-review-2012.10663"/></url>
<url><loc>https://scifaro.com/en/abs/adjust-free-adversarial-example-generation-in-speech-recognition-using-evolutionary-multi-objective-optimization-under-black-box-condition-2012.11138</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adjust-free-adversarial-example-generation-in-speech-recognition-using-evolutionary-multi-objective-optimization-under-black-box-condition-2012.11138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adjust-free-adversarial-example-generation-in-speech-recognition-using-evolutionary-multi-objective-optimization-under-black-box-condition-2012.11138"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stream-convolutional-neural-network-with-frequency-selection-for-robust-speaker-verification-2012.11159</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stream-convolutional-neural-network-with-frequency-selection-for-robust-speaker-verification-2012.11159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stream-convolutional-neural-network-with-frequency-selection-for-robust-speaker-verification-2012.11159"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effectiveness-of-signal-decomposition-feature-extraction-and-selection-on-lung-sound-classification-2012.11759</loc><lastmod>2020-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effectiveness-of-signal-decomposition-feature-extraction-and-selection-on-lung-sound-classification-2012.11759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effectiveness-of-signal-decomposition-feature-extraction-and-selection-on-lung-sound-classification-2012.11759"/></url>
<url><loc>https://scifaro.com/en/abs/cn-celeb-multi-genre-speaker-recognition-2012.12468</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cn-celeb-multi-genre-speaker-recognition-2012.12468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cn-celeb-multi-genre-speaker-recognition-2012.12468"/></url>
<url><loc>https://scifaro.com/en/abs/a-principle-solution-for-enroll-test-mismatch-in-speaker-recognition-2012.12471</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-principle-solution-for-enroll-test-mismatch-in-speaker-recognition-2012.12471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-principle-solution-for-enroll-test-mismatch-in-speaker-recognition-2012.12471"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-text-to-speech-synthesis-using-pseudo-lookahead-with-large-pretrained-language-model-2012.12612</loc><lastmod>2021-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-text-to-speech-synthesis-using-pseudo-lookahead-with-large-pretrained-language-model-2012.12612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-text-to-speech-synthesis-using-pseudo-lookahead-with-large-pretrained-language-model-2012.12612"/></url>
<url><loc>https://scifaro.com/en/abs/inception-based-network-and-multi-spectrogram-ensemble-applied-for-predicting-respiratory-anomalies-and-lung-diseases-2012.13699</loc><lastmod>2020-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inception-based-network-and-multi-spectrogram-ensemble-applied-for-predicting-respiratory-anomalies-and-lung-diseases-2012.13699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inception-based-network-and-multi-spectrogram-ensemble-applied-for-predicting-respiratory-anomalies-and-lung-diseases-2012.13699"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-covid-19-from-breathing-and-coughing-sounds-using-deep-neural-networks-2012.14553</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-covid-19-from-breathing-and-coughing-sounds-using-deep-neural-networks-2012.14553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-covid-19-from-breathing-and-coughing-sounds-using-deep-neural-networks-2012.14553"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-audio-recognition-a-supervised-dictionary-approach-2012.14761</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-audio-recognition-a-supervised-dictionary-approach-2012.14761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-audio-recognition-a-supervised-dictionary-approach-2012.14761"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-temporal-alignment-for-non-parallel-articulatory-to-acoustic-speech-synthesis-2012.15184</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-temporal-alignment-for-non-parallel-articulatory-to-acoustic-speech-synthesis-2012.15184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-temporal-alignment-for-non-parallel-articulatory-to-acoustic-speech-synthesis-2012.15184"/></url>
<url><loc>https://scifaro.com/en/abs/unified-mandarin-tts-front-end-based-on-distilled-bert-model-2012.15404</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-mandarin-tts-front-end-based-on-distilled-bert-model-2012.15404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-mandarin-tts-front-end-based-on-distilled-bert-model-2012.15404"/></url>
<url><loc>https://scifaro.com/en/abs/efficientnet-absolute-zero-for-continuous-speech-keyword-spotting-2012.15695</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficientnet-absolute-zero-for-continuous-speech-keyword-spotting-2012.15695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficientnet-absolute-zero-for-continuous-speech-keyword-spotting-2012.15695"/></url>
<url><loc>https://scifaro.com/en/abs/psychoacoustic-calibration-of-loss-functions-for-efficient-end-to-end-neural-audio-coding-2101.00054</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psychoacoustic-calibration-of-loss-functions-for-efficient-end-to-end-neural-audio-coding-2101.00054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psychoacoustic-calibration-of-loss-functions-for-efficient-end-to-end-neural-audio-coding-2101.00054"/></url>
<url><loc>https://scifaro.com/en/abs/generative-deep-learning-for-virtuosic-classical-music-generative-adversarial-networks-as-renowned-composers-2101.00169</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-deep-learning-for-virtuosic-classical-music-generative-adversarial-networks-as-renowned-composers-2101.00169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-deep-learning-for-virtuosic-classical-music-generative-adversarial-networks-as-renowned-composers-2101.00169"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-deep-reinforcement-learning-for-audio-based-applications-2101.00240</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-deep-reinforcement-learning-for-audio-based-applications-2101.00240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-deep-reinforcement-learning-for-audio-based-applications-2101.00240"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-unsupervised-domain-adaptation-for-harmonic-percussive-source-separation-2101.00701</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-unsupervised-domain-adaptation-for-harmonic-percussive-source-separation-2101.00701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-unsupervised-domain-adaptation-for-harmonic-percussive-source-separation-2101.00701"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-policy-for-pre-trained-deep-reinforcement-learning-for-speech-emotion-recognition-2101.00738</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-policy-for-pre-trained-deep-reinforcement-learning-for-speech-emotion-recognition-2101.00738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-policy-for-pre-trained-deep-reinforcement-learning-for-speech-emotion-recognition-2101.00738"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-spatio-temporal-rnn-beamformer-for-target-speech-separation-2101.01280</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-spatio-temporal-rnn-beamformer-for-target-speech-separation-2101.01280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-spatio-temporal-rnn-beamformer-for-target-speech-separation-2101.01280"/></url>
<url><loc>https://scifaro.com/en/abs/development-of-a-respiratory-sound-labeling-software-for-training-a-deep-learning-based-respiratory-sound-analysis-model-2101.01352</loc><lastmod>2021-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-of-a-respiratory-sound-labeling-software-for-training-a-deep-learning-based-respiratory-sound-analysis-model-2101.01352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-of-a-respiratory-sound-labeling-software-for-training-a-deep-learning-based-respiratory-sound-analysis-model-2101.01352"/></url>
<url><loc>https://scifaro.com/en/abs/fixed-maml-for-few-shot-classification-in-multilingual-speech-emotion-recognition-2101.01356</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fixed-maml-for-few-shot-classification-in-multilingual-speech-emotion-recognition-2101.01356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fixed-maml-for-few-shot-classification-in-multilingual-speech-emotion-recognition-2101.01356"/></url>
<url><loc>https://scifaro.com/en/abs/hypothesis-stitcher-for-end-to-end-speaker-attributed-asr-on-long-form-multi-talker-recordings-2101.01853</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hypothesis-stitcher-for-end-to-end-speaker-attributed-asr-on-long-form-multi-talker-recordings-2101.01853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hypothesis-stitcher-for-end-to-end-speaker-attributed-asr-on-long-form-multi-talker-recordings-2101.01853"/></url>
<url><loc>https://scifaro.com/en/abs/environment-transfer-for-distributed-systems-2101.01863</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environment-transfer-for-distributed-systems-2101.01863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environment-transfer-for-distributed-systems-2101.01863"/></url>
<url><loc>https://scifaro.com/en/abs/interspeech-2021-deep-noise-suppression-challenge-2101.01902</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interspeech-2021-deep-noise-suppression-challenge-2101.01902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interspeech-2021-deep-noise-suppression-challenge-2101.01902"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-crnn-for-speaker-counting-an-analysis-of-performance-2101.01977</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-crnn-for-speaker-counting-an-analysis-of-performance-2101.01977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-crnn-for-speaker-counting-an-analysis-of-performance-2101.01977"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-efficacy-of-music-version-retrieval-systems-for-setlist-identification-2101.02098</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-efficacy-of-music-version-retrieval-systems-for-setlist-identification-2101.02098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-efficacy-of-music-version-retrieval-systems-for-setlist-identification-2101.02098"/></url>
<url><loc>https://scifaro.com/en/abs/compound-word-transformer-learning-to-compose-full-song-music-over-dynamic-directed-hypergraphs-2101.02402</loc><lastmod>2021-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compound-word-transformer-learning-to-compose-full-song-music-over-dynamic-directed-hypergraphs-2101.02402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compound-word-transformer-learning-to-compose-full-song-music-over-dynamic-directed-hypergraphs-2101.02402"/></url>
<url><loc>https://scifaro.com/en/abs/a-four-stage-data-augmentation-approach-to-resnet-conformer-based-acoustic-modeling-for-sound-event-localization-and-detection-2101.02919</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-four-stage-data-augmentation-approach-to-resnet-conformer-based-acoustic-modeling-for-sound-event-localization-and-detection-2101.02919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-four-stage-data-augmentation-approach-to-resnet-conformer-based-acoustic-modeling-for-sound-event-localization-and-detection-2101.02919"/></url>
<url><loc>https://scifaro.com/en/abs/practical-speech-re-use-prevention-in-voice-driven-services-2101.04773</loc><lastmod>2021-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/practical-speech-re-use-prevention-in-voice-driven-services-2101.04773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/practical-speech-re-use-prevention-in-voice-driven-services-2101.04773"/></url>
<url><loc>https://scifaro.com/en/abs/mp3net-coherent-minute-long-music-generation-from-raw-audio-with-a-simple-convolutional-gan-2101.04785</loc><lastmod>2021-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mp3net-coherent-minute-long-music-generation-from-raw-audio-with-a-simple-convolutional-gan-2101.04785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mp3net-coherent-minute-long-music-generation-from-raw-audio-with-a-simple-convolutional-gan-2101.04785"/></url>
<url><loc>https://scifaro.com/en/abs/deep-attention-based-representation-learning-for-heart-sound-classification-2101.04979</loc><lastmod>2021-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-attention-based-representation-learning-for-heart-sound-classification-2101.04979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-attention-based-representation-learning-for-heart-sound-classification-2101.04979"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-height-and-age-estimation-using-attention-mechanism-with-lstm-rnn-2101.05056</loc><lastmod>2021-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-height-and-age-estimation-using-attention-mechanism-with-lstm-rnn-2101.05056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-height-and-age-estimation-using-attention-mechanism-with-lstm-rnn-2101.05056"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-heart-abnormality-detection-based-on-phonocardiogram-analysis-with-beta-variational-auto-encoders-2101.05443</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-heart-abnormality-detection-based-on-phonocardiogram-analysis-with-beta-variational-auto-encoders-2101.05443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-heart-abnormality-detection-based-on-phonocardiogram-analysis-with-beta-variational-auto-encoders-2101.05443"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-volume-multichannel-nonnegative-matrix-factorization-for-blind-source-separation-2101.06398</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-volume-multichannel-nonnegative-matrix-factorization-for-blind-source-separation-2101.06398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-volume-multichannel-nonnegative-matrix-factorization-for-blind-source-separation-2101.06398"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-disentangled-representation-learning-for-singing-voice-conversion-2101.06842</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-disentangled-representation-learning-for-singing-voice-conversion-2101.06842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-disentangled-representation-learning-for-singing-voice-conversion-2101.06842"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-to-compare-music-generative-models-using-automatic-evaluation-metrics-extended-to-rhythm-2101.07669</loc><lastmod>2021-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-to-compare-music-generative-models-using-automatic-evaluation-metrics-extended-to-rhythm-2101.07669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-to-compare-music-generative-models-using-automatic-evaluation-metrics-extended-to-rhythm-2101.07669"/></url>
<url><loc>https://scifaro.com/en/abs/towards-duration-robust-weakly-supervised-sound-event-detection-2101.07687</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-duration-robust-weakly-supervised-sound-event-detection-2101.07687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-duration-robust-weakly-supervised-sound-event-detection-2101.07687"/></url>
<url><loc>https://scifaro.com/en/abs/the-diagnosis-of-asthma-using-hilbert-huang-transform-and-deep-learning-on-lung-sounds-2101.08288</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-diagnosis-of-asthma-using-hilbert-huang-transform-and-deep-learning-on-lung-sounds-2101.08288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-diagnosis-of-asthma-using-hilbert-huang-transform-and-deep-learning-on-lung-sounds-2101.08288"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-deep-learning-feature-inference-techniques-on-respiratory-sounds-2101.08438</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-deep-learning-feature-inference-techniques-on-respiratory-sounds-2101.08438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-deep-learning-feature-inference-techniques-on-respiratory-sounds-2101.08438"/></url>
<url><loc>https://scifaro.com/en/abs/turkish-voice-commands-based-chess-game-using-gammatone-cepstral-coefficients-2101.08441</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/turkish-voice-commands-based-chess-game-using-gammatone-cepstral-coefficients-2101.08441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/turkish-voice-commands-based-chess-game-using-gammatone-cepstral-coefficients-2101.08441"/></url>
<url><loc>https://scifaro.com/en/abs/online-streaming-end-to-end-neural-diarization-handling-overlapping-speech-and-flexible-numbers-of-speakers-2101.08473</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-streaming-end-to-end-neural-diarization-handling-overlapping-speech-and-flexible-numbers-of-speakers-2101.08473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-streaming-end-to-end-neural-diarization-handling-overlapping-speech-and-flexible-numbers-of-speakers-2101.08473"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-window-size-for-detection-of-abnormalities-in-respiratory-sounds-2101.08495</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-window-size-for-detection-of-abnormalities-in-respiratory-sounds-2101.08495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-window-size-for-detection-of-abnormalities-in-respiratory-sounds-2101.08495"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-diagonalization-based-efficient-approach-to-underdetermined-blind-audio-source-separation-using-the-multichannel-wiener-filter-2101.08563</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-diagonalization-based-efficient-approach-to-underdetermined-blind-audio-source-separation-using-the-multichannel-wiener-filter-2101.08563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-diagonalization-based-efficient-approach-to-underdetermined-blind-audio-source-separation-using-the-multichannel-wiener-filter-2101.08563"/></url>
<url><loc>https://scifaro.com/en/abs/leaf-a-learnable-frontend-for-audio-classification-2101.08596</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leaf-a-learnable-frontend-for-audio-classification-2101.08596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leaf-a-learnable-frontend-for-audio-classification-2101.08596"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-beam-search-confidence-for-energy-efficient-speech-recognition-2101.09083</loc><lastmod>2021-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-beam-search-confidence-for-energy-efficient-speech-recognition-2101.09083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-beam-search-confidence-for-energy-efficient-speech-recognition-2101.09083"/></url>
<url><loc>https://scifaro.com/en/abs/domain-dependent-speaker-diarization-for-the-third-dihard-challenge-2101.09884</loc><lastmod>2021-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-dependent-speaker-diarization-for-the-third-dihard-challenge-2101.09884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-dependent-speaker-diarization-for-the-third-dihard-challenge-2101.09884"/></url>
<url><loc>https://scifaro.com/en/abs/using-angle-of-arrival-for-improving-indoor-localization-2101.09904</loc><lastmod>2021-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-angle-of-arrival-for-improving-indoor-localization-2101.09904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-angle-of-arrival-for-improving-indoor-localization-2101.09904"/></url>
<url><loc>https://scifaro.com/en/abs/novel-recording-studio-features-for-music-information-retrieval-2101.10201</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-recording-studio-features-for-music-information-retrieval-2101.10201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-recording-studio-features-for-music-information-retrieval-2101.10201"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-vocoding-design-with-signal-processing-for-speech-synthesis-and-voice-conversion-2101.10278</loc><lastmod>2021-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-vocoding-design-with-signal-processing-for-speech-synthesis-and-voice-conversion-2101.10278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-vocoding-design-with-signal-processing-for-speech-synthesis-and-voice-conversion-2101.10278"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-neural-voice-cloning-2102.00151</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-neural-voice-cloning-2102.00151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-neural-voice-cloning-2102.00151"/></url>
<url><loc>https://scifaro.com/en/abs/melon-playlist-dataset-a-public-dataset-for-audio-based-playlist-generation-and-music-tagging-2102.00201</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melon-playlist-dataset-a-public-dataset-for-audio-based-playlist-generation-and-music-tagging-2102.00201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melon-playlist-dataset-a-public-dataset-for-audio-based-playlist-generation-and-music-tagging-2102.00201"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-by-simply-fine-tuning-bert-2102.00291</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-by-simply-fine-tuning-bert-2102.00291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-by-simply-fine-tuning-bert-2102.00291"/></url>
<url><loc>https://scifaro.com/en/abs/cortical-features-for-defense-against-adversarial-audio-attacks-2102.00313</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cortical-features-for-defense-against-adversarial-audio-attacks-2102.00313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cortical-features-for-defense-against-adversarial-audio-attacks-2102.00313"/></url>
<url><loc>https://scifaro.com/en/abs/structure-aware-audio-to-score-alignment-using-progressively-dilated-convolutional-neural-networks-2102.00382</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structure-aware-audio-to-score-alignment-using-progressively-dilated-convolutional-neural-networks-2102.00382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structure-aware-audio-to-score-alignment-using-progressively-dilated-convolutional-neural-networks-2102.00382"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-speech-regeneration-with-application-to-speech-enhancement-2102.00429</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-speech-regeneration-with-application-to-speech-enhancement-2102.00429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-speech-regeneration-with-application-to-speech-enhancement-2102.00429"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-the-predictive-accurary-of-singer-identification-using-discrete-wavelet-transform-for-feature-extraction-2102.00550</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-the-predictive-accurary-of-singer-identification-using-discrete-wavelet-transform-for-feature-extraction-2102.00550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-the-predictive-accurary-of-singer-identification-using-discrete-wavelet-transform-for-feature-extraction-2102.00550"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-architectures-to-classify-emotions-in-indian-classical-music-2102.00616</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-architectures-to-classify-emotions-in-indian-classical-music-2102.00616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-architectures-to-classify-emotions-in-indian-classical-music-2102.00616"/></url>
<url><loc>https://scifaro.com/en/abs/rich-prosody-diversity-modelling-with-phone-level-mixture-density-network-2102.00851</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rich-prosody-diversity-modelling-with-phone-level-mixture-density-network-2102.00851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rich-prosody-diversity-modelling-with-phone-level-mixture-density-network-2102.00851"/></url>
<url><loc>https://scifaro.com/en/abs/deep-music-information-dynamics-2102.01133</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-music-information-dynamics-2102.01133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-music-information-dynamics-2102.01133"/></url>
<url><loc>https://scifaro.com/en/abs/psla-improving-audio-tagging-with-pretraining-sampling-labeling-and-aggregation-2102.01243</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psla-improving-audio-tagging-with-pretraining-sampling-labeling-and-aggregation-2102.01243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psla-improving-audio-tagging-with-pretraining-sampling-labeling-and-aggregation-2102.01243"/></url>
<url><loc>https://scifaro.com/en/abs/wenet-production-oriented-streaming-and-non-streaming-end-to-end-speech-recognition-toolkit-2102.01547</loc><lastmod>2021-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wenet-production-oriented-streaming-and-non-streaming-end-to-end-speech-recognition-toolkit-2102.01547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wenet-production-oriented-streaming-and-non-streaming-end-to-end-speech-recognition-toolkit-2102.01547"/></url>
<url><loc>https://scifaro.com/en/abs/speak-with-your-hands-using-continuous-hand-gestures-to-control-articulatory-speech-synthesizer-2102.01640</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-with-your-hands-using-continuous-hand-gestures-to-control-articulatory-speech-synthesizer-2102.01640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-with-your-hands-using-continuous-hand-gestures-to-control-articulatory-speech-synthesizer-2102.01640"/></url>
<url><loc>https://scifaro.com/en/abs/generacion-de-voces-artificiales-infantiles-en-castellano-con-acento-costarricense-2102.01692</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generacion-de-voces-artificiales-infantiles-en-castellano-con-acento-costarricense-2102.01692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generacion-de-voces-artificiales-infantiles-en-castellano-con-acento-costarricense-2102.01692"/></url>
<url><loc>https://scifaro.com/en/abs/lssed-a-large-scale-dataset-and-benchmark-for-speech-emotion-recognition-2102.01754</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lssed-a-large-scale-dataset-and-benchmark-for-speech-emotion-recognition-2102.01754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lssed-a-large-scale-dataset-and-benchmark-for-speech-emotion-recognition-2102.01754"/></url>
<url><loc>https://scifaro.com/en/abs/a-speaker-verification-backend-with-robust-performance-across-conditions-2102.01760</loc><lastmod>2021-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speaker-verification-backend-with-robust-performance-across-conditions-2102.01760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speaker-verification-backend-with-robust-performance-across-conditions-2102.01760"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-with-multiscale-area-attention-and-data-augmentation-2102.01813</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-multiscale-area-attention-and-data-augmentation-2102.01813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-multiscale-area-attention-and-data-augmentation-2102.01813"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-sound-duration-and-inactive-frames-on-sound-event-detection-performance-2102.01927</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-sound-duration-and-inactive-frames-on-sound-event-detection-performance-2102.01927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-sound-duration-and-inactive-frames-on-sound-event-detection-performance-2102.01927"/></url>
<url><loc>https://scifaro.com/en/abs/general-purpose-speech-representation-learning-through-a-self-supervised-multi-granularity-framework-2102.01930</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/general-purpose-speech-representation-learning-through-a-self-supervised-multi-granularity-framework-2102.01930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/general-purpose-speech-representation-learning-through-a-self-supervised-multi-granularity-framework-2102.01930"/></url>
<url><loc>https://scifaro.com/en/abs/towards-natural-and-controllable-cross-lingual-voice-conversion-based-on-neural-tts-model-and-phonetic-posteriorgram-2102.01991</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-natural-and-controllable-cross-lingual-voice-conversion-based-on-neural-tts-model-and-phonetic-posteriorgram-2102.01991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-natural-and-controllable-cross-lingual-voice-conversion-based-on-neural-tts-model-and-phonetic-posteriorgram-2102.01991"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-speech-enhancement-with-complex-convolutional-block-attention-module-and-joint-time-frequency-losses-2102.01993</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-speech-enhancement-with-complex-convolutional-block-attention-module-and-joint-time-frequency-losses-2102.01993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-speech-enhancement-with-complex-convolutional-block-attention-module-and-joint-time-frequency-losses-2102.01993"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-conditioned-on-3d-point-clouds-2102.02028</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-conditioned-on-3d-point-clouds-2102.02028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-conditioned-on-3d-point-clouds-2102.02028"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-structure-inverse-design-and-optimization-using-deep-learning-2102.02063</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-structure-inverse-design-and-optimization-using-deep-learning-2102.02063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-structure-inverse-design-and-optimization-using-deep-learning-2102.02063"/></url>
<url><loc>https://scifaro.com/en/abs/data-generation-using-pass-phrase-dependent-deep-auto-encoders-for-text-dependent-speaker-verification-2102.02074</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-generation-using-pass-phrase-dependent-deep-auto-encoders-for-text-dependent-speaker-verification-2102.02074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-generation-using-pass-phrase-dependent-deep-auto-encoders-for-text-dependent-speaker-verification-2102.02074"/></url>
<url><loc>https://scifaro.com/en/abs/downbeat-tracking-with-tempo-invariant-convolutional-neural-networks-2102.02282</loc><lastmod>2021-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/downbeat-tracking-with-tempo-invariant-convolutional-neural-networks-2102.02282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/downbeat-tracking-with-tempo-invariant-convolutional-neural-networks-2102.02282"/></url>
<url><loc>https://scifaro.com/en/abs/audio-adversarial-examples-attacks-using-vocal-masks-2102.02417</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-adversarial-examples-attacks-using-vocal-masks-2102.02417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-adversarial-examples-attacks-using-vocal-masks-2102.02417"/></url>
<url><loc>https://scifaro.com/en/abs/low-bit-rate-wideband-speech-coding-a-deep-generative-model-based-approach-2102.02640</loc><lastmod>2021-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-bit-rate-wideband-speech-coding-a-deep-generative-model-based-approach-2102.02640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-bit-rate-wideband-speech-coding-a-deep-generative-model-based-approach-2102.02640"/></url>
<url><loc>https://scifaro.com/en/abs/chord-embeddings-analyzing-what-they-capture-and-their-role-for-next-chord-prediction-and-artist-attribute-prediction-2102.02917</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-embeddings-analyzing-what-they-capture-and-their-role-for-next-chord-prediction-and-artist-attribute-prediction-2102.02917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-embeddings-analyzing-what-they-capture-and-their-role-for-next-chord-prediction-and-artist-attribute-prediction-2102.02917"/></url>
<url><loc>https://scifaro.com/en/abs/diversity-robust-acoustic-feature-signatures-based-on-multiscale-fractal-dimension-for-similarity-search-of-environmental-sounds-2102.02964</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diversity-robust-acoustic-feature-signatures-based-on-multiscale-fractal-dimension-for-similarity-search-of-environmental-sounds-2102.02964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diversity-robust-acoustic-feature-signatures-based-on-multiscale-fractal-dimension-for-similarity-search-of-environmental-sounds-2102.02964"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-of-eight-recurrent-neural-network-variants-for-breath-phase-and-adventitious-sound-detection-on-a-self-developed-open-access-lung-sound-database-hf-lung-v1-2102.03049</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-of-eight-recurrent-neural-network-variants-for-breath-phase-and-adventitious-sound-detection-on-a-self-developed-open-access-lung-sound-database-hf-lung-v1-2102.03049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-of-eight-recurrent-neural-network-variants-for-breath-phase-and-adventitious-sound-detection-on-a-self-developed-open-access-lung-sound-database-hf-lung-v1-2102.03049"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-augmentation-and-adaptive-ctc-fusion-for-improved-robustness-of-multi-stream-end-to-end-asr-2102.03055</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-augmentation-and-adaptive-ctc-fusion-for-improved-robustness-of-multi-stream-end-to-end-asr-2102.03055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-augmentation-and-adaptive-ctc-fusion-for-improved-robustness-of-multi-stream-end-to-end-asr-2102.03055"/></url>
<url><loc>https://scifaro.com/en/abs/white-box-audio-vst-effect-programming-2102.03170</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/white-box-audio-vst-effect-programming-2102.03170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/white-box-audio-vst-effect-programming-2102.03170"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-denoising-and-dereverberation-with-tiny-recurrent-u-net-2102.03207</loc><lastmod>2021-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-denoising-and-dereverberation-with-tiny-recurrent-u-net-2102.03207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-denoising-and-dereverberation-with-tiny-recurrent-u-net-2102.03207"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-self-supervised-pre-training-for-music-classification-2102.03229</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-self-supervised-pre-training-for-music-classification-2102.03229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-self-supervised-pre-training-for-music-classification-2102.03229"/></url>
<url><loc>https://scifaro.com/en/abs/u-vectors-generating-clusterable-speaker-embedding-from-unlabeled-data-2102.03868</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-vectors-generating-clusterable-speaker-embedding-from-unlabeled-data-2102.03868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-vectors-generating-clusterable-speaker-embedding-from-unlabeled-data-2102.03868"/></url>
<url><loc>https://scifaro.com/en/abs/extracting-the-auditory-attention-in-a-dual-speaker-scenario-from-eeg-using-a-joint-cnn-lstm-model-2102.03957</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extracting-the-auditory-attention-in-a-dual-speaker-scenario-from-eeg-using-a-joint-cnn-lstm-model-2102.03957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extracting-the-auditory-attention-in-a-dual-speaker-scenario-from-eeg-using-a-joint-cnn-lstm-model-2102.03957"/></url>
<url><loc>https://scifaro.com/en/abs/lightspeech-lightweight-and-fast-text-to-speech-with-neural-architecture-search-2102.04040</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightspeech-lightweight-and-fast-text-to-speech-with-neural-architecture-search-2102.04040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightspeech-lightweight-and-fast-text-to-speech-with-neural-architecture-search-2102.04040"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-direction-inferred-dual-channel-speech-separation-2102.04056</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-direction-inferred-dual-channel-speech-separation-2102.04056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-direction-inferred-dual-channel-speech-separation-2102.04056"/></url>
<url><loc>https://scifaro.com/en/abs/an-update-on-a-progressively-expanded-database-for-automated-lung-sound-analysis-2102.04062</loc><lastmod>2021-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-update-on-a-progressively-expanded-database-for-automated-lung-sound-analysis-2102.04062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-update-on-a-progressively-expanded-database-for-automated-lung-sound-analysis-2102.04062"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-decoupling-magnitude-and-phase-optimization-with-a-two-stage-deep-network-2102.04198</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-decoupling-magnitude-and-phase-optimization-with-a-two-stage-deep-network-2102.04198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-decoupling-magnitude-and-phase-optimization-with-a-two-stage-deep-network-2102.04198"/></url>
<url><loc>https://scifaro.com/en/abs/federated-acoustic-modeling-for-automatic-speech-recognition-2102.04429</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-acoustic-modeling-for-automatic-speech-recognition-2102.04429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-acoustic-modeling-for-automatic-speech-recognition-2102.04429"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-two-dimensional-vocal-tract-acoustic-modeling-based-on-finite-difference-time-domain-methods-2102.04588</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-two-dimensional-vocal-tract-acoustic-modeling-based-on-finite-difference-time-domain-methods-2102.04588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-two-dimensional-vocal-tract-acoustic-modeling-based-on-finite-difference-time-domain-methods-2102.04588"/></url>
<url><loc>https://scifaro.com/en/abs/tr-aumerai-dreaming-music-with-stylegan-2102.04680</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tr-aumerai-dreaming-music-with-stylegan-2102.04680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tr-aumerai-dreaming-music-with-stylegan-2102.04680"/></url>
<url><loc>https://scifaro.com/en/abs/diagnosis-of-covid-19-and-non-covid-19-patients-by-classifying-only-a-single-cough-sound-2102.04880</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diagnosis-of-covid-19-and-non-covid-19-patients-by-classifying-only-a-single-cough-sound-2102.04880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diagnosis-of-covid-19-and-non-covid-19-patients-by-classifying-only-a-single-cough-sound-2102.04880"/></url>
<url><loc>https://scifaro.com/en/abs/on-permutation-invariant-training-for-speech-source-separation-2102.04945</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-permutation-invariant-training-for-speech-source-separation-2102.04945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-permutation-invariant-training-for-speech-source-separation-2102.04945"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audio-augmentation-methods-with-consistency-learning-2102.05151</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audio-augmentation-methods-with-consistency-learning-2102.05151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audio-augmentation-methods-with-consistency-learning-2102.05151"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-automatic-covid-19-diagnosis-via-voice-and-symptoms-from-crowdsourced-data-2102.05225</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-automatic-covid-19-diagnosis-via-voice-and-symptoms-from-crowdsourced-data-2102.05225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-automatic-covid-19-diagnosis-via-voice-and-symptoms-from-crowdsourced-data-2102.05225"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-based-on-curriculum-learning-considering-learning-difficulty-of-events-2102.05288</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-based-on-curriculum-learning-considering-learning-difficulty-of-events-2102.05288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-based-on-curriculum-learning-considering-learning-difficulty-of-events-2102.05288"/></url>
<url><loc>https://scifaro.com/en/abs/voice-cloning-a-multi-speaker-text-to-speech-synthesis-approach-based-on-transfer-learning-2102.05630</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-cloning-a-multi-speaker-text-to-speech-synthesis-approach-based-on-transfer-learning-2102.05630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-cloning-a-multi-speaker-text-to-speech-synthesis-approach-based-on-transfer-learning-2102.05630"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-vq-vae-for-one-shot-music-style-transfer-2102.05749</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-vq-vae-for-one-shot-music-style-transfer-2102.05749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-vq-vae-for-one-shot-music-style-transfer-2102.05749"/></url>
<url><loc>https://scifaro.com/en/abs/onoma-to-wave-environmental-sound-synthesis-from-onomatopoeic-words-2102.05872</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/onoma-to-wave-environmental-sound-synthesis-from-onomatopoeic-words-2102.05872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/onoma-to-wave-environmental-sound-synthesis-from-onomatopoeic-words-2102.05872"/></url>
<url><loc>https://scifaro.com/en/abs/casa-based-speaker-identification-using-cascaded-gmm-cnn-classifier-in-noisy-and-emotional-talking-conditions-2102.05894</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/casa-based-speaker-identification-using-cascaded-gmm-cnn-classifier-in-noisy-and-emotional-talking-conditions-2102.05894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/casa-based-speaker-identification-using-cascaded-gmm-cnn-classifier-in-noisy-and-emotional-talking-conditions-2102.05894"/></url>
<url><loc>https://scifaro.com/en/abs/language-independent-emotion-quantification-using-non-linear-modelling-of-speech-2102.06003</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-independent-emotion-quantification-using-non-linear-modelling-of-speech-2102.06003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-independent-emotion-quantification-using-non-linear-modelling-of-speech-2102.06003"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-mixture-of-deep-experts-with-clean-clustering-pre-training-2102.06034</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-mixture-of-deep-experts-with-clean-clustering-pre-training-2102.06034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-mixture-of-deep-experts-with-clean-clustering-pre-training-2102.06034"/></url>
<url><loc>https://scifaro.com/en/abs/a-fractal-approach-to-characterize-emotions-in-audio-and-visual-domain-a-study-on-cross-modal-interaction-2102.06038</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fractal-approach-to-characterize-emotions-in-audio-and-visual-domain-a-study-on-cross-modal-interaction-2102.06038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fractal-approach-to-characterize-emotions-in-audio-and-visual-domain-a-study-on-cross-modal-interaction-2102.06038"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-based-learning-for-audio-object-extraction-2102.06142</loc><lastmod>2021-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-based-learning-for-audio-object-extraction-2102.06142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-based-learning-for-audio-object-extraction-2102.06142"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-view-approach-to-audio-visual-speaker-verification-2102.06291</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-view-approach-to-audio-visual-speaker-verification-2102.06291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-view-approach-to-audio-visual-speaker-verification-2102.06291"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-speech-emotion-recognition-2102.06357</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-speech-emotion-recognition-2102.06357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-unsupervised-learning-for-speech-emotion-recognition-2102.06357"/></url>
<url><loc>https://scifaro.com/en/abs/vara-tts-non-autoregressive-text-to-speech-synthesis-based-on-very-deep-vae-with-residual-attention-2102.06431</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vara-tts-non-autoregressive-text-to-speech-synthesis-based-on-very-deep-vae-with-residual-attention-2102.06431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vara-tts-non-autoregressive-text-to-speech-synthesis-based-on-very-deep-vae-with-residual-attention-2102.06431"/></url>
<url><loc>https://scifaro.com/en/abs/deep-sound-field-reconstruction-in-real-rooms-introducing-the-isobel-sound-field-dataset-2102.06455</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-sound-field-reconstruction-in-real-rooms-introducing-the-isobel-sound-field-dataset-2102.06455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-sound-field-reconstruction-in-real-rooms-introducing-the-isobel-sound-field-dataset-2102.06455"/></url>
<url><loc>https://scifaro.com/en/abs/content-aware-speaker-embeddings-for-speaker-diarisation-2102.06467</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-aware-speaker-embeddings-for-speaker-diarisation-2102.06467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-aware-speaker-embeddings-for-speaker-diarisation-2102.06467"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-and-recurrent-networks-for-polyphonic-instrument-classification-from-monophonic-raw-audio-waveforms-2102.06930</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-and-recurrent-networks-for-polyphonic-instrument-classification-from-monophonic-raw-audio-waveforms-2102.06930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-and-recurrent-networks-for-polyphonic-instrument-classification-from-monophonic-raw-audio-waveforms-2102.06930"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speech-enhancement-using-graph-neural-networks-2102.06934</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speech-enhancement-using-graph-neural-networks-2102.06934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speech-enhancement-using-graph-neural-networks-2102.06934"/></url>
<url><loc>https://scifaro.com/en/abs/parametric-optimization-of-violin-top-plates-using-machine-learning-2102.07133</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parametric-optimization-of-violin-top-plates-using-machine-learning-2102.07133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parametric-optimization-of-violin-top-plates-using-machine-learning-2102.07133"/></url>
<url><loc>https://scifaro.com/en/abs/thank-you-for-attention-a-survey-on-attention-based-artificial-neural-networks-for-automatic-speech-recognition-2102.07259</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thank-you-for-attention-a-survey-on-attention-based-artificial-neural-networks-for-automatic-speech-recognition-2102.07259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thank-you-for-attention-a-survey-on-attention-based-artificial-neural-networks-for-automatic-speech-recognition-2102.07259"/></url>
<url><loc>https://scifaro.com/en/abs/i-vector-based-within-speaker-voice-quality-identification-on-connected-speech-2102.07307</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-vector-based-within-speaker-voice-quality-identification-on-connected-speech-2102.07307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-vector-based-within-speaker-voice-quality-identification-on-connected-speech-2102.07307"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-with-machine-learning-a-systematic-review-2102.07820</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-with-machine-learning-a-systematic-review-2102.07820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-with-machine-learning-a-systematic-review-2102.07820"/></url>
<url><loc>https://scifaro.com/en/abs/voice-gender-scoring-and-independent-acoustic-characterization-of-perceived-masculinity-and-femininity-2102.07982</loc><lastmod>2022-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-gender-scoring-and-independent-acoustic-characterization-of-perceived-masculinity-and-femininity-2102.07982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-gender-scoring-and-independent-acoustic-characterization-of-perceived-masculinity-and-femininity-2102.07982"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-recognition-models-with-small-samples-for-air-traffic-control-systems-2102.08015</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-recognition-models-with-small-samples-for-air-traffic-control-systems-2102.08015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-recognition-models-with-small-samples-for-air-traffic-control-systems-2102.08015"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-for-few-shot-audio-classification-by-episodic-triplet-mining-2102.08074</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-for-few-shot-audio-classification-by-episodic-triplet-mining-2102.08074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-for-few-shot-audio-classification-by-episodic-triplet-mining-2102.08074"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-semi-supervised-deep-learning-algorithms-for-audio-classification-2102.08183</loc><lastmod>2023-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-semi-supervised-deep-learning-algorithms-for-audio-classification-2102.08183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-semi-supervised-deep-learning-algorithms-for-audio-classification-2102.08183"/></url>
<url><loc>https://scifaro.com/en/abs/end-2-end-covid-19-detection-from-breath-cough-audio-2102.08359</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-2-end-covid-19-detection-from-breath-cough-audio-2102.08359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-2-end-covid-19-detection-from-breath-cough-audio-2102.08359"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-recursive-least-square-filter-and-neural-network-based-residual-echo-suppression-for-the-aec-challenge-2102.08551</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-recursive-least-square-filter-and-neural-network-based-residual-echo-suppression-for-the-aec-challenge-2102.08551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-recursive-least-square-filter-and-neural-network-based-residual-echo-suppression-for-the-aec-challenge-2102.08551"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-voice-to-singing-style-transfer-2102.08575</loc><lastmod>2021-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-voice-to-singing-style-transfer-2102.08575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-voice-to-singing-style-transfer-2102.08575"/></url>
<url><loc>https://scifaro.com/en/abs/desed-fl-and-urban-fl-federated-learning-datasets-for-sound-event-detection-2102.08833</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/desed-fl-and-urban-fl-federated-learning-datasets-for-sound-event-detection-2102.08833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/desed-fl-and-urban-fl-federated-learning-datasets-for-sound-event-detection-2102.08833"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-audio-to-lyrics-alignment-from-polyphonic-music-recordings-2102.09202</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-audio-to-lyrics-alignment-from-polyphonic-music-recordings-2102.09202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-audio-to-lyrics-alignment-from-polyphonic-music-recordings-2102.09202"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-temporal-attention-network-for-singing-melody-extraction-2102.09763</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-temporal-attention-network-for-singing-melody-extraction-2102.09763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-temporal-attention-network-for-singing-melody-extraction-2102.09763"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-recurrent-neural-networks-for-conditional-melody-generation-with-long-term-structure-2102.09794</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-recurrent-neural-networks-for-conditional-melody-generation-with-long-term-structure-2102.09794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-recurrent-neural-networks-for-conditional-melody-generation-with-long-term-structure-2102.09794"/></url>
<url><loc>https://scifaro.com/en/abs/unit-selection-synthesis-based-data-augmentation-for-fixed-phrase-speaker-verification-2102.09817</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unit-selection-synthesis-based-data-augmentation-for-fixed-phrase-speaker-verification-2102.09817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unit-selection-synthesis-based-data-augmentation-for-fixed-phrase-speaker-verification-2102.09817"/></url>
<url><loc>https://scifaro.com/en/abs/aispeech-sjtu-accent-identification-system-for-the-accented-english-speech-recognition-challenge-2102.09828</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aispeech-sjtu-accent-identification-system-for-the-accented-english-speech-recognition-challenge-2102.09828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aispeech-sjtu-accent-identification-system-for-the-accented-english-speech-recognition-challenge-2102.09828"/></url>
<url><loc>https://scifaro.com/en/abs/catnet-music-source-separation-system-with-mix-audio-augmentation-2102.09966</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catnet-music-source-separation-system-with-mix-audio-augmentation-2102.09966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catnet-music-source-separation-system-with-mix-audio-augmentation-2102.09966"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-weakly-labelled-data-from-audioset-2102.09971</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-weakly-labelled-data-from-audioset-2102.09971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-weakly-labelled-data-from-audioset-2102.09971"/></url>
<url><loc>https://scifaro.com/en/abs/transmask-a-compact-and-fast-speech-separation-model-based-on-transformer-2102.09978</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transmask-a-compact-and-fast-speech-separation-model-based-on-transformer-2102.09978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transmask-a-compact-and-fast-speech-separation-model-based-on-transformer-2102.09978"/></url>
<url><loc>https://scifaro.com/en/abs/the-accented-english-speech-recognition-challenge-2020-open-datasets-tracks-baselines-results-and-methods-2102.10233</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-accented-english-speech-recognition-challenge-2020-open-datasets-tracks-baselines-results-and-methods-2102.10233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-accented-english-speech-recognition-challenge-2020-open-datasets-tracks-baselines-results-and-methods-2102.10233"/></url>
<url><loc>https://scifaro.com/en/abs/singer-identification-using-deep-timbre-feature-learning-with-knn-net-2102.10236</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singer-identification-using-deep-timbre-feature-learning-with-knn-net-2102.10236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singer-identification-using-deep-timbre-feature-learning-with-knn-net-2102.10236"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-mfccs-for-speaker-verification-2102.10322</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-mfccs-for-speaker-verification-2102.10322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-mfccs-for-speaker-verification-2102.10322"/></url>
<url><loc>https://scifaro.com/en/abs/anomaly-detection-in-audio-with-concept-drift-using-adaptive-huffman-coding-2102.10515</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomaly-detection-in-audio-with-concept-drift-using-adaptive-huffman-coding-2102.10515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomaly-detection-in-audio-with-concept-drift-using-adaptive-huffman-coding-2102.10515"/></url>
<url><loc>https://scifaro.com/en/abs/anyone-gan-sing-2102.11058</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anyone-gan-sing-2102.11058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anyone-gan-sing-2102.11058"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-deep-neural-structures-and-their-interpretability-in-the-domain-of-voice-conversion-2102.11420</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-deep-neural-structures-and-their-interpretability-in-the-domain-of-voice-conversion-2102.11420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-deep-neural-structures-and-their-interpretability-in-the-domain-of-voice-conversion-2102.11420"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-local-and-global-information-for-automated-audio-captioning-with-transfer-learning-2102.11457</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-local-and-global-information-for-automated-audio-captioning-with-transfer-learning-2102.11457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-local-and-global-information-for-automated-audio-captioning-with-transfer-learning-2102.11457"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-audio-grounding-building-correspondence-between-captions-and-sound-events-2102.11474</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-audio-grounding-building-correspondence-between-captions-and-sound-events-2102.11474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-audio-grounding-building-correspondence-between-captions-and-sound-events-2102.11474"/></url>
<url><loc>https://scifaro.com/en/abs/senone-aware-adversarial-multi-task-training-for-unsupervised-child-to-adult-speech-adaptation-2102.11488</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/senone-aware-adversarial-multi-task-training-for-unsupervised-child-to-adult-speech-adaptation-2102.11488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/senone-aware-adversarial-multi-task-training-for-unsupervised-child-to-adult-speech-adaptation-2102.11488"/></url>
<url><loc>https://scifaro.com/en/abs/memory-efficient-speech-recognition-on-smart-devices-2102.11531</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-efficient-speech-recognition-on-smart-devices-2102.11531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-efficient-speech-recognition-on-smart-devices-2102.11531"/></url>
<url><loc>https://scifaro.com/en/abs/data-fusion-for-audiovisual-speaker-localization-extending-dynamic-stream-weights-to-the-spatial-domain-2102.11588</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-fusion-for-audiovisual-speaker-localization-extending-dynamic-stream-weights-to-the-spatial-domain-2102.11588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-fusion-for-audiovisual-speaker-localization-extending-dynamic-stream-weights-to-the-spatial-domain-2102.11588"/></url>
<url><loc>https://scifaro.com/en/abs/improving-deep-learning-sound-events-classifiers-using-gram-matrix-feature-wise-correlations-2102.11771</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-deep-learning-sound-events-classifiers-using-gram-matrix-feature-wise-correlations-2102.11771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-deep-learning-sound-events-classifiers-using-gram-matrix-feature-wise-correlations-2102.11771"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-approach-for-singer-voice-classification-of-vietnamese-popular-music-2102.12111</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-approach-for-singer-voice-classification-of-vietnamese-popular-music-2102.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-approach-for-singer-voice-classification-of-vietnamese-popular-music-2102.12111"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-feature-extraction-for-heartbeat-anomaly-detection-2102.12289</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-feature-extraction-for-heartbeat-anomaly-detection-2102.12289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-feature-extraction-for-heartbeat-anomaly-detection-2102.12289"/></url>
<url><loc>https://scifaro.com/en/abs/triplet-loss-based-embeddings-for-forensic-speaker-identification-in-spanish-2102.12564</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/triplet-loss-based-embeddings-for-forensic-speaker-identification-in-spanish-2102.12564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/triplet-loss-based-embeddings-for-forensic-speaker-identification-in-spanish-2102.12564"/></url>
<url><loc>https://scifaro.com/en/abs/maskcyclegan-vc-learning-non-parallel-voice-conversion-with-filling-in-frames-2102.12841</loc><lastmod>2021-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maskcyclegan-vc-learning-non-parallel-voice-conversion-with-filling-in-frames-2102.12841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maskcyclegan-vc-learning-non-parallel-voice-conversion-with-filling-in-frames-2102.12841"/></url>
<url><loc>https://scifaro.com/en/abs/towards-explaining-expressive-qualities-in-piano-recordings-transfer-of-explanatory-features-via-acoustic-domain-adaptation-2102.13479</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-explaining-expressive-qualities-in-piano-recordings-transfer-of-explanatory-features-via-acoustic-domain-adaptation-2102.13479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-explaining-expressive-qualities-in-piano-recordings-transfer-of-explanatory-features-via-acoustic-domain-adaptation-2102.13479"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-system-for-the-2020-personalized-voice-trigger-challenge-2102.13552</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-system-for-the-2020-personalized-voice-trigger-challenge-2102.13552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-system-for-the-2020-personalized-voice-trigger-challenge-2102.13552"/></url>
<url><loc>https://scifaro.com/en/abs/mbnet-mos-prediction-for-synthesized-speech-with-mean-bias-network-2103.00110</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mbnet-mos-prediction-for-synthesized-speech-with-mean-bias-network-2103.00110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mbnet-mos-prediction-for-synthesized-speech-with-mean-bias-network-2103.00110"/></url>
<url><loc>https://scifaro.com/en/abs/expert-decision-support-system-for-aeroacoustic-source-type-identification-using-clustering-2103.00255</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expert-decision-support-system-for-aeroacoustic-source-type-identification-using-clustering-2103.00255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expert-decision-support-system-for-aeroacoustic-source-type-identification-using-clustering-2103.00255"/></url>
<url><loc>https://scifaro.com/en/abs/brain-signals-to-rescue-aphasia-apraxia-and-dysarthria-speech-recognition-2103.00383</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/brain-signals-to-rescue-aphasia-apraxia-and-dysarthria-speech-recognition-2103.00383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/brain-signals-to-rescue-aphasia-apraxia-and-dysarthria-speech-recognition-2103.00383"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-attention-based-sequence-to-sequence-architectures-for-sound-event-localization-2103.00417</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-attention-based-sequence-to-sequence-architectures-for-sound-event-localization-2103.00417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-attention-based-sequence-to-sequence-architectures-for-sound-event-localization-2103.00417"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-classification-of-voiced-speech-and-pitch-tracking-using-forward-backward-kalman-filtering-2103.01173</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-classification-of-voiced-speech-and-pitch-tracking-using-forward-backward-kalman-filtering-2103.01173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-classification-of-voiced-speech-and-pitch-tracking-using-forward-backward-kalman-filtering-2103.01173"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-separation-using-cross-modal-correspondence-loss-2103.01463</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-separation-using-cross-modal-correspondence-loss-2103.01463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-separation-using-cross-modal-correspondence-loss-2103.01463"/></url>
<url><loc>https://scifaro.com/en/abs/open-range-pitch-tracking-for-carrier-frequency-difference-estimation-from-hf-transmitted-speech-2103.01599</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-range-pitch-tracking-for-carrier-frequency-difference-estimation-from-hf-transmitted-speech-2103.01599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-range-pitch-tracking-for-carrier-frequency-difference-estimation-from-hf-transmitted-speech-2103.01599"/></url>
<url><loc>https://scifaro.com/en/abs/virufy-a-multi-branch-deep-learning-network-for-automated-detection-of-covid-19-2103.01806</loc><lastmod>2021-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/virufy-a-multi-branch-deep-learning-network-for-automated-detection-of-covid-19-2103.01806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/virufy-a-multi-branch-deep-learning-network-for-automated-detection-of-covid-19-2103.01806"/></url>
<url><loc>https://scifaro.com/en/abs/audio-scene-monitoring-using-redundant-ad-hoc-microphone-array-networks-2103.01830</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-scene-monitoring-using-redundant-ad-hoc-microphone-array-networks-2103.01830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-scene-monitoring-using-redundant-ad-hoc-microphone-array-networks-2103.01830"/></url>
<url><loc>https://scifaro.com/en/abs/listen-read-and-identify-multimodal-singing-language-identification-of-music-2103.01893</loc><lastmod>2021-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-read-and-identify-multimodal-singing-language-identification-of-music-2103.01893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-read-and-identify-multimodal-singing-language-identification-of-music-2103.01893"/></url>
<url><loc>https://scifaro.com/en/abs/investigations-on-audiovisual-emotion-recognition-in-noisy-conditions-2103.01894</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigations-on-audiovisual-emotion-recognition-in-noisy-conditions-2103.01894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigations-on-audiovisual-emotion-recognition-in-noisy-conditions-2103.01894"/></url>
<url><loc>https://scifaro.com/en/abs/soundclr-contrastive-learning-of-representations-for-improved-environmental-sound-classification-2103.01929</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundclr-contrastive-learning-of-representations-for-improved-environmental-sound-classification-2103.01929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundclr-contrastive-learning-of-representations-for-improved-environmental-sound-classification-2103.01929"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-separation-with-ad-hoc-microphone-arrays-2103.02378</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-separation-with-ad-hoc-microphone-arrays-2103.02378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-separation-with-ad-hoc-microphone-arrays-2103.02378"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-audio-and-music-classification-2103.02420</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-audio-and-music-classification-2103.02420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-audio-and-music-classification-2103.02420"/></url>
<url><loc>https://scifaro.com/en/abs/compute-and-memory-efficient-universal-sound-source-separation-2103.02644</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compute-and-memory-efficient-universal-sound-source-separation-2103.02644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compute-and-memory-efficient-universal-sound-source-separation-2103.02644"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-semantic-information-2103.02993</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-semantic-information-2103.02993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-semantic-information-2103.02993"/></url>
<url><loc>https://scifaro.com/en/abs/error-driven-fixed-budget-asr-personalization-for-accented-speakers-2103.03142</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/error-driven-fixed-budget-asr-personalization-for-accented-speakers-2103.03142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/error-driven-fixed-budget-asr-personalization-for-accented-speakers-2103.03142"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-classification-on-the-edge-a-pipeline-for-deep-acoustic-networks-on-extremely-resource-constrained-devices-2103.03483</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-classification-on-the-edge-a-pipeline-for-deep-acoustic-networks-on-extremely-resource-constrained-devices-2103.03483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-classification-on-the-edge-a-pipeline-for-deep-acoustic-networks-on-extremely-resource-constrained-devices-2103.03483"/></url>
<url><loc>https://scifaro.com/en/abs/slow-fast-auditory-streams-for-audio-recognition-2103.03516</loc><lastmod>2021-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slow-fast-auditory-streams-for-audio-recognition-2103.03516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slow-fast-auditory-streams-for-audio-recognition-2103.03516"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-speech-synthesis-a-brief-literature-review-2103.03927</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-speech-synthesis-a-brief-literature-review-2103.03927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-speech-synthesis-a-brief-literature-review-2103.03927"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-assessment-of-controllability-of-an-expressive-deep-learning-based-tts-system-2103.04097</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-assessment-of-controllability-of-an-expressive-deep-learning-based-tts-system-2103.04097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-assessment-of-controllability-of-an-expressive-deep-learning-based-tts-system-2103.04097"/></url>
<url><loc>https://scifaro.com/en/abs/gan-vocoder-multi-resolution-discriminator-is-all-you-need-2103.05236</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gan-vocoder-multi-resolution-discriminator-is-all-you-need-2103.05236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gan-vocoder-multi-resolution-discriminator-is-all-you-need-2103.05236"/></url>
<url><loc>https://scifaro.com/en/abs/spheroidal-ambisonics-a-spatial-audio-framework-using-spheroidal-bases-2103.05719</loc><lastmod>2023-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spheroidal-ambisonics-a-spatial-audio-framework-using-spheroidal-bases-2103.05719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spheroidal-ambisonics-a-spatial-audio-framework-using-spheroidal-bases-2103.05719"/></url>
<url><loc>https://scifaro.com/en/abs/search-disaster-victims-using-sound-source-localization-2103.06049</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/search-disaster-victims-using-sound-source-localization-2103.06049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/search-disaster-victims-using-sound-source-localization-2103.06049"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speaker-independent-dysarthric-speech-intelligibility-assessment-system-2103.06157</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speaker-independent-dysarthric-speech-intelligibility-assessment-system-2103.06157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speaker-independent-dysarthric-speech-intelligibility-assessment-system-2103.06157"/></url>
<url><loc>https://scifaro.com/en/abs/multi-format-contrastive-learning-of-audio-representations-2103.06508</loc><lastmod>2021-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-format-contrastive-learning-of-audio-representations-2103.06508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-format-contrastive-learning-of-audio-representations-2103.06508"/></url>
<url><loc>https://scifaro.com/en/abs/topological-data-analysis-of-korean-music-in-jeongganbo-a-cycle-structure-2103.06620</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topological-data-analysis-of-korean-music-in-jeongganbo-a-cycle-structure-2103.06620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topological-data-analysis-of-korean-music-in-jeongganbo-a-cycle-structure-2103.06620"/></url>
<url><loc>https://scifaro.com/en/abs/learning-spectro-temporal-representations-of-complex-sounds-with-parameterized-neural-networks-2103.07125</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-spectro-temporal-representations-of-complex-sounds-with-parameterized-neural-networks-2103.07125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-spectro-temporal-representations-of-complex-sounds-with-parameterized-neural-networks-2103.07125"/></url>
<url><loc>https://scifaro.com/en/abs/latent-space-explorations-of-singing-voice-synthesis-using-ddsp-2103.07197</loc><lastmod>2021-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-space-explorations-of-singing-voice-synthesis-using-ddsp-2103.07197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-space-explorations-of-singing-voice-synthesis-using-ddsp-2103.07197"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-timbre-transfer-and-sound-synthesis-using-ddsp-2103.07220</loc><lastmod>2021-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-timbre-transfer-and-sound-synthesis-using-ddsp-2103.07220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-timbre-transfer-and-sound-synthesis-using-ddsp-2103.07220"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-animal-biodiversity-using-acoustic-monitoring-and-deep-learning-2103.07276</loc><lastmod>2021-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-animal-biodiversity-using-acoustic-monitoring-and-deep-learning-2103.07276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-animal-biodiversity-using-acoustic-monitoring-and-deep-learning-2103.07276"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-embedding-calibration-for-symbolic-music-similarity-2103.07656</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-embedding-calibration-for-symbolic-music-similarity-2103.07656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-embedding-calibration-for-symbolic-music-similarity-2103.07656"/></url>
<url><loc>https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-and-speech-transmission-index-using-mtf-based-cnns-2103.07904</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-and-speech-transmission-index-using-mtf-based-cnns-2103.07904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-and-speech-transmission-index-using-mtf-based-cnns-2103.07904"/></url>
<url><loc>https://scifaro.com/en/abs/multi-discriminator-sobolev-defense-gan-against-adversarial-attacks-for-end-to-end-speech-systems-2103.08086</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-discriminator-sobolev-defense-gan-against-adversarial-attacks-for-end-to-end-speech-systems-2103.08086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-discriminator-sobolev-defense-gan-against-adversarial-attacks-for-end-to-end-speech-systems-2103.08086"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-speech-to-text-adversarial-attack-2103.08095</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-speech-to-text-adversarial-attack-2103.08095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-speech-to-text-adversarial-attack-2103.08095"/></url>
<url><loc>https://scifaro.com/en/abs/computational-timbre-and-tonal-system-similarity-analysis-of-the-music-of-northern-myanmar-based-kachin-compared-to-xinjiang-based-uyghur-ethnic-groups-2103.08203</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computational-timbre-and-tonal-system-similarity-analysis-of-the-music-of-northern-myanmar-based-kachin-compared-to-xinjiang-based-uyghur-ethnic-groups-2103.08203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computational-timbre-and-tonal-system-similarity-analysis-of-the-music-of-northern-myanmar-based-kachin-compared-to-xinjiang-based-uyghur-ethnic-groups-2103.08203"/></url>
<url><loc>https://scifaro.com/en/abs/emonet-a-transfer-learning-framework-for-multi-corpus-speech-emotion-recognition-2103.08310</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emonet-a-transfer-learning-framework-for-multi-corpus-speech-emotion-recognition-2103.08310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emonet-a-transfer-learning-framework-for-multi-corpus-speech-emotion-recognition-2103.08310"/></url>
<url><loc>https://scifaro.com/en/abs/dhasp-differentiable-hearing-aid-speech-processing-2103.08569</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dhasp-differentiable-hearing-aid-speech-processing-2103.08569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dhasp-differentiable-hearing-aid-speech-processing-2103.08569"/></url>
<url><loc>https://scifaro.com/en/abs/fast-development-of-asr-in-african-languages-using-self-supervised-speech-representation-learning-2103.08993</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-development-of-asr-in-african-languages-using-self-supervised-speech-representation-learning-2103.08993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-development-of-asr-in-african-languages-using-self-supervised-speech-representation-learning-2103.08993"/></url>
<url><loc>https://scifaro.com/en/abs/an-asynchronous-wfst-based-decoder-for-automatic-speech-recognition-2103.09063</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-asynchronous-wfst-based-decoder-for-automatic-speech-recognition-2103.09063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-asynchronous-wfst-based-decoder-for-automatic-speech-recognition-2103.09063"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-of-musical-representations-2103.09410</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-of-musical-representations-2103.09410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-of-musical-representations-2103.09410"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-permutations-with-differentiable-ranking-2103.09879</loc><lastmod>2021-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-permutations-with-differentiable-ranking-2103.09879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-permutations-with-differentiable-ranking-2103.09879"/></url>
<url><loc>https://scifaro.com/en/abs/audio-description-from-image-by-modal-translation-network-2103.10018</loc><lastmod>2021-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-description-from-image-by-modal-translation-network-2103.10018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-description-from-image-by-modal-translation-network-2103.10018"/></url>
<url><loc>https://scifaro.com/en/abs/ustc-nelslip-system-description-for-dihard-iii-challenge-2103.10661</loc><lastmod>2021-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ustc-nelslip-system-description-for-dihard-iii-challenge-2103.10661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ustc-nelslip-system-description-for-dihard-iii-challenge-2103.10661"/></url>
<url><loc>https://scifaro.com/en/abs/reduced-basis-methods-for-numerical-room-acoustic-simulations-with-parametrized-boundaries-2103.11730</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduced-basis-methods-for-numerical-room-acoustic-simulations-with-parametrized-boundaries-2103.11730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduced-basis-methods-for-numerical-room-acoustic-simulations-with-parametrized-boundaries-2103.11730"/></url>
<url><loc>https://scifaro.com/en/abs/self-paced-ensemble-learning-for-speech-and-audio-classification-2103.11988</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-paced-ensemble-learning-for-speech-and-audio-classification-2103.11988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-paced-ensemble-learning-for-speech-and-audio-classification-2103.11988"/></url>
<url><loc>https://scifaro.com/en/abs/musical-mix-clarity-predication-using-decomposition-and-perceptual-masking-thresholds-2103.12152</loc><lastmod>2021-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-mix-clarity-predication-using-decomposition-and-perceptual-masking-thresholds-2103.12152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-mix-clarity-predication-using-decomposition-and-perceptual-masking-thresholds-2103.12152"/></url>
<url><loc>https://scifaro.com/en/abs/tiny-transformers-for-environmental-sound-classification-at-the-edge-2103.12157</loc><lastmod>2021-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiny-transformers-for-environmental-sound-classification-at-the-edge-2103.12157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiny-transformers-for-environmental-sound-classification-at-the-edge-2103.12157"/></url>
<url><loc>https://scifaro.com/en/abs/gise-51-a-scalable-isolated-sound-events-dataset-2103.12306</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gise-51-a-scalable-isolated-sound-events-dataset-2103.12306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gise-51-a-scalable-isolated-sound-events-dataset-2103.12306"/></url>
<url><loc>https://scifaro.com/en/abs/learned-complex-masks-for-multi-instrument-source-separation-2103.12864</loc><lastmod>2021-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learned-complex-masks-for-multi-instrument-source-separation-2103.12864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learned-complex-masks-for-multi-instrument-source-separation-2103.12864"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-for-piano-sustain-pedal-detection-2103.13219</loc><lastmod>2021-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-for-piano-sustain-pedal-detection-2103.13219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-for-piano-sustain-pedal-detection-2103.13219"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-cough-classification-for-tuberculosis-screening-in-a-real-world-environment-2103.13300</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-cough-classification-for-tuberculosis-screening-in-a-real-world-environment-2103.13300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-cough-classification-for-tuberculosis-screening-in-a-real-world-environment-2103.13300"/></url>
<url><loc>https://scifaro.com/en/abs/blind-speech-separation-and-dereverberation-using-neural-beamforming-2103.13443</loc><lastmod>2021-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-speech-separation-and-dereverberation-using-neural-beamforming-2103.13443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-speech-separation-and-dereverberation-using-neural-beamforming-2103.13443"/></url>
<url><loc>https://scifaro.com/en/abs/subspectral-normalization-for-neural-audio-data-processing-2103.13620</loc><lastmod>2021-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspectral-normalization-for-neural-audio-data-processing-2103.13620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspectral-normalization-for-neural-audio-data-processing-2103.13620"/></url>
<url><loc>https://scifaro.com/en/abs/image2reverb-cross-modal-reverb-impulse-response-synthesis-2103.14201</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/image2reverb-cross-modal-reverb-impulse-response-synthesis-2103.14201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/image2reverb-cross-modal-reverb-impulse-response-synthesis-2103.14201"/></url>
<url><loc>https://scifaro.com/en/abs/three-dimensional-higher-order-raypath-separation-in-a-shallow-water-waveguide-2103.14206</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/three-dimensional-higher-order-raypath-separation-in-a-shallow-water-waveguide-2103.14206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/three-dimensional-higher-order-raypath-separation-in-a-shallow-water-waveguide-2103.14206"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-the-compatibility-of-stem-tracks-to-generate-music-mashups-2103.14208</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-the-compatibility-of-stem-tracks-to-generate-music-mashups-2103.14208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-the-compatibility-of-stem-tracks-to-generate-music-mashups-2103.14208"/></url>
<url><loc>https://scifaro.com/en/abs/subspace-based-compressive-sensing-algorithm-for-raypath-separation-in-a-shallow-water-waveguide-2103.14236</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspace-based-compressive-sensing-algorithm-for-raypath-separation-in-a-shallow-water-waveguide-2103.14236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspace-based-compressive-sensing-algorithm-for-raypath-separation-in-a-shallow-water-waveguide-2103.14236"/></url>
<url><loc>https://scifaro.com/en/abs/improve-gan-based-neural-vocoder-using-pointwise-relativistic-leastsquare-gan-2103.14245</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improve-gan-based-neural-vocoder-using-pointwise-relativistic-leastsquare-gan-2103.14245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improve-gan-based-neural-vocoder-using-pointwise-relativistic-leastsquare-gan-2103.14245"/></url>
<url><loc>https://scifaro.com/en/abs/guided-training-a-simple-method-for-single-channel-speaker-separation-2103.14330</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-training-a-simple-method-for-single-channel-speaker-separation-2103.14330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-training-a-simple-method-for-single-channel-speaker-separation-2103.14330"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-tacotron-2-a-non-autoregressive-neural-tts-model-with-differentiable-duration-modeling-2103.14574</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-tacotron-2-a-non-autoregressive-neural-tts-model-with-differentiable-duration-modeling-2103.14574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-tacotron-2-a-non-autoregressive-neural-tts-model-with-differentiable-duration-modeling-2103.14574"/></url>
<url><loc>https://scifaro.com/en/abs/cyclic-defense-gan-against-speech-adversarial-attacks-2103.14717</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclic-defense-gan-against-speech-adversarial-attacks-2103.14717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclic-defense-gan-against-speech-adversarial-attacks-2103.14717"/></url>
<url><loc>https://scifaro.com/en/abs/construction-of-a-large-scale-japanese-asr-corpus-on-tv-recordings-2103.14736</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/construction-of-a-large-scale-japanese-asr-corpus-on-tv-recordings-2103.14736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/construction-of-a-large-scale-japanese-asr-corpus-on-tv-recordings-2103.14736"/></url>
<url><loc>https://scifaro.com/en/abs/on-tasnet-for-low-latency-single-speaker-speech-enhancement-2103.14882</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-tasnet-for-low-latency-single-speaker-speech-enhancement-2103.14882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-tasnet-for-low-latency-single-speaker-speech-enhancement-2103.14882"/></url>
<url><loc>https://scifaro.com/en/abs/feature-based-representation-for-violin-bridge-admittances-2103.14895</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-based-representation-for-violin-bridge-admittances-2103.14895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-based-representation-for-violin-bridge-admittances-2103.14895"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-residual-gaussian-based-self-attention-2103.15722</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-residual-gaussian-based-self-attention-2103.15722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-end-to-end-speech-recognition-with-residual-gaussian-based-self-attention-2103.15722"/></url>
<url><loc>https://scifaro.com/en/abs/audio-classification-of-the-content-of-food-containers-and-drinking-glasses-2103.15999</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-classification-of-the-content-of-food-containers-and-drinking-glasses-2103.15999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-classification-of-the-content-of-food-containers-and-drinking-glasses-2103.15999"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-analysis-with-mixup-based-multitask-learning-and-cross-task-fusion-2103.16079</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-analysis-with-mixup-based-multitask-learning-and-cross-task-fusion-2103.16079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-analysis-with-mixup-based-multitask-learning-and-cross-task-fusion-2103.16079"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-generation-with-diffusion-models-2103.16091</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-generation-with-diffusion-models-2103.16091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-generation-with-diffusion-models-2103.16091"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speech-enhancement-with-generative-adversarial-learning-2103.16149</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-with-generative-adversarial-learning-2103.16149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-with-generative-adversarial-learning-2103.16149"/></url>
<url><loc>https://scifaro.com/en/abs/ts-rir-translated-synthetic-room-impulse-responses-for-speech-augmentation-2103.16804</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ts-rir-translated-synthetic-room-impulse-responses-for-speech-augmentation-2103.16804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ts-rir-translated-synthetic-room-impulse-responses-for-speech-augmentation-2103.16804"/></url>
<url><loc>https://scifaro.com/en/abs/near-field-acoustic-holography-on-arbitrary-shapes-using-convolutional-neural-network-2103.16935</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/near-field-acoustic-holography-on-arbitrary-shapes-using-convolutional-neural-network-2103.16935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/near-field-acoustic-holography-on-arbitrary-shapes-using-convolutional-neural-network-2103.16935"/></url>
<url><loc>https://scifaro.com/en/abs/towards-citizen-science-for-smart-cities-a-framework-for-a-collaborative-game-of-bird-call-recognition-based-on-internet-of-sound-practices-2103.16988</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-citizen-science-for-smart-cities-a-framework-for-a-collaborative-game-of-bird-call-recognition-based-on-internet-of-sound-practices-2103.16988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-citizen-science-for-smart-cities-a-framework-for-a-collaborative-game-of-bird-call-recognition-based-on-internet-of-sound-practices-2103.16988"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-enhanced-speech-emotion-communication-using-deep-learning-aided-edge-computing-2103.17139</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-enhanced-speech-emotion-communication-using-deep-learning-aided-edge-computing-2103.17139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-enhanced-speech-emotion-communication-using-deep-learning-aided-edge-computing-2103.17139"/></url>
<url><loc>https://scifaro.com/en/abs/speech-resynthesis-from-discrete-disentangled-self-supervised-representations-2104.00355</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-resynthesis-from-discrete-disentangled-self-supervised-representations-2104.00355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-resynthesis-from-discrete-disentangled-self-supervised-representations-2104.00355"/></url>
<url><loc>https://scifaro.com/en/abs/enriched-music-representations-with-multiple-cross-modal-contrastive-learning-2104.00437</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enriched-music-representations-with-multiple-cross-modal-contrastive-learning-2104.00437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enriched-music-representations-with-multiple-cross-modal-contrastive-learning-2104.00437"/></url>
<url><loc>https://scifaro.com/en/abs/auto-kws-2021-challenge-task-datasets-and-baselines-2104.00513</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-kws-2021-challenge-task-datasets-and-baselines-2104.00513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-kws-2021-challenge-task-datasets-and-baselines-2104.00513"/></url>
<url><loc>https://scifaro.com/en/abs/outliernets-highly-compact-deep-autoencoder-network-architectures-for-on-device-acoustic-anomaly-detection-2104.00528</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/outliernets-highly-compact-deep-autoencoder-network-architectures-for-on-device-acoustic-anomaly-detection-2104.00528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/outliernets-highly-compact-deep-autoencoder-network-architectures-for-on-device-acoustic-anomaly-detection-2104.00528"/></url>
<url><loc>https://scifaro.com/en/abs/multi-rate-attention-architecture-for-fast-streamable-text-to-speech-spectrum-modeling-2104.00705</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-rate-attention-architecture-for-fast-streamable-text-to-speech-spectrum-modeling-2104.00705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-rate-attention-architecture-for-fast-streamable-text-to-speech-spectrum-modeling-2104.00705"/></url>
<url><loc>https://scifaro.com/en/abs/out-of-a-hundred-trials-how-many-errors-does-your-speaker-verifier-make-2104.00732</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/out-of-a-hundred-trials-how-many-errors-does-your-speaker-verifier-make-2104.00732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/out-of-a-hundred-trials-how-many-errors-does-your-speaker-verifier-make-2104.00732"/></url>
<url><loc>https://scifaro.com/en/abs/robust-wav2vec-2-0-analyzing-domain-shift-in-self-supervised-pre-training-2104.01027</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-wav2vec-2-0-analyzing-domain-shift-in-self-supervised-pre-training-2104.01027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-wav2vec-2-0-analyzing-domain-shift-in-self-supervised-pre-training-2104.01027"/></url>
<url><loc>https://scifaro.com/en/abs/phyaug-physics-directed-data-augmentation-for-deep-sensing-model-transfer-in-cyber-physical-systems-2104.01160</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phyaug-physics-directed-data-augmentation-for-deep-sensing-model-transfer-in-cyber-physical-systems-2104.01160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phyaug-physics-directed-data-augmentation-for-deep-sensing-model-transfer-in-cyber-physical-systems-2104.01160"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-based-deep-learning-framework-for-bbc-television-programme-classification-2104.01161</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-based-deep-learning-framework-for-bbc-television-programme-classification-2104.01161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-based-deep-learning-framework-for-bbc-television-programme-classification-2104.01161"/></url>
<url><loc>https://scifaro.com/en/abs/pate-aae-incorporating-adversarial-autoencoder-into-private-aggregation-of-teacher-ensembles-for-spoken-command-classification-2104.01271</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pate-aae-incorporating-adversarial-autoencoder-into-private-aggregation-of-teacher-ensembles-for-spoken-command-classification-2104.01271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pate-aae-incorporating-adversarial-autoencoder-into-private-aggregation-of-teacher-ensembles-for-spoken-command-classification-2104.01271"/></url>
<url><loc>https://scifaro.com/en/abs/diarization-of-legal-proceedings-identifying-and-transcribing-judicial-speech-from-recorded-court-audio-2104.01304</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diarization-of-legal-proceedings-identifying-and-transcribing-judicial-speech-from-recorded-court-audio-2104.01304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diarization-of-legal-proceedings-identifying-and-transcribing-judicial-speech-from-recorded-court-audio-2104.01304"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-of-orthogonal-sequences-made-from-extended-time-stretched-pulses-enables-measurement-of-involuntary-voice-fundamental-frequency-response-to-pitch-perturbation-2104.01444</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-of-orthogonal-sequences-made-from-extended-time-stretched-pulses-enables-measurement-of-involuntary-voice-fundamental-frequency-response-to-pitch-perturbation-2104.01444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-of-orthogonal-sequences-made-from-extended-time-stretched-pulses-enables-measurement-of-involuntary-voice-fundamental-frequency-response-to-pitch-perturbation-2104.01444"/></url>
<url><loc>https://scifaro.com/en/abs/ast-audio-spectrogram-transformer-2104.01778</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ast-audio-spectrogram-transformer-2104.01778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ast-audio-spectrogram-transformer-2104.01778"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-based-emotional-voice-conversion-for-japanese-phrases-2104.01807</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-based-emotional-voice-conversion-for-japanese-phrases-2104.01807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-based-emotional-voice-conversion-for-japanese-phrases-2104.01807"/></url>
<url><loc>https://scifaro.com/en/abs/acted-vs-improvised-domain-adaptation-for-elicitation-approaches-in-audio-visual-emotion-recognition-2104.01978</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acted-vs-improvised-domain-adaptation-for-elicitation-approaches-in-audio-visual-emotion-recognition-2104.01978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acted-vs-improvised-domain-adaptation-for-elicitation-approaches-in-audio-visual-emotion-recognition-2104.01978"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-aware-covid-19-detection-from-imbalanced-sound-data-2104.02005</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-aware-covid-19-detection-from-imbalanced-sound-data-2104.02005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-aware-covid-19-detection-from-imbalanced-sound-data-2104.02005"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-multi-talker-speech-recognition-with-joint-speaker-identification-2104.02109</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-multi-talker-speech-recognition-with-joint-speaker-identification-2104.02109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-multi-talker-speech-recognition-with-joint-speaker-identification-2104.02109"/></url>
<url><loc>https://scifaro.com/en/abs/dissecting-user-perceived-latency-of-on-device-e2e-speech-recognition-2104.02207</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dissecting-user-perceived-latency-of-on-device-e2e-speech-recognition-2104.02207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dissecting-user-perceived-latency-of-on-device-e2e-speech-recognition-2104.02207"/></url>
<url><loc>https://scifaro.com/en/abs/flexi-transducer-optimizing-latency-accuracy-and-compute-formulti-domain-on-device-scenarios-2104.02232</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexi-transducer-optimizing-latency-accuracy-and-compute-formulti-domain-on-device-scenarios-2104.02232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexi-transducer-optimizing-latency-accuracy-and-compute-formulti-domain-on-device-scenarios-2104.02232"/></url>
<url><loc>https://scifaro.com/en/abs/binary-neural-network-for-speaker-verification-2104.02306</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binary-neural-network-for-speaker-verification-2104.02306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binary-neural-network-for-speaker-verification-2104.02306"/></url>
<url><loc>https://scifaro.com/en/abs/muslcat-multi-scale-multi-level-convolutional-attention-transformer-for-discriminative-music-modeling-on-raw-waveforms-2104.02309</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muslcat-multi-scale-multi-level-convolutional-attention-transformer-for-discriminative-music-modeling-on-raw-waveforms-2104.02309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muslcat-multi-scale-multi-level-convolutional-attention-transformer-for-discriminative-music-modeling-on-raw-waveforms-2104.02309"/></url>
<url><loc>https://scifaro.com/en/abs/towards-consistent-hybrid-hmm-acoustic-modeling-2104.02387</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-consistent-hybrid-hmm-acoustic-modeling-2104.02387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-consistent-hybrid-hmm-acoustic-modeling-2104.02387"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-detection-in-cough-breath-and-speech-using-deep-transfer-learning-and-bottleneck-features-2104.02477</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-detection-in-cough-breath-and-speech-using-deep-transfer-learning-and-bottleneck-features-2104.02477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-detection-in-cough-breath-and-speech-using-deep-transfer-learning-and-bottleneck-features-2104.02477"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-transport-based-adaptation-in-dysarthric-speech-tasks-2104.02535</loc><lastmod>2022-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-transport-based-adaptation-in-dysarthric-speech-tasks-2104.02535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-transport-based-adaptation-in-dysarthric-speech-tasks-2104.02535"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-ctc-and-lfmmi-for-out-of-domain-adaptation-of-wav2vec-2-0-acoustic-model-2104.02558</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-ctc-and-lfmmi-for-out-of-domain-adaptation-of-wav2vec-2-0-acoustic-model-2104.02558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-ctc-and-lfmmi-for-out-of-domain-adaptation-of-wav2vec-2-0-acoustic-model-2104.02558"/></url>
<url><loc>https://scifaro.com/en/abs/darts-conformer-towards-efficient-gradient-based-neural-architecture-search-for-end-to-end-asr-2104.02868</loc><lastmod>2021-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/darts-conformer-towards-efficient-gradient-based-neural-architecture-search-for-end-to-end-asr-2104.02868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/darts-conformer-towards-efficient-gradient-based-neural-architecture-search-for-end-to-end-asr-2104.02868"/></url>
<url><loc>https://scifaro.com/en/abs/learning-robust-speech-representation-with-an-articulatory-regularized-variational-autoencoder-2104.03204</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-robust-speech-representation-with-an-articulatory-regularized-variational-autoencoder-2104.03204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-robust-speech-representation-with-an-articulatory-regularized-variational-autoencoder-2104.03204"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-from-speech-using-wav2vec-2-0-embeddings-2104.03502</loc><lastmod>2021-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-using-wav2vec-2-0-embeddings-2104.03502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-from-speech-using-wav2vec-2-0-embeddings-2104.03502"/></url>
<url><loc>https://scifaro.com/en/abs/towards-multi-scale-style-control-for-expressive-speech-synthesis-2104.03521</loc><lastmod>2021-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-multi-scale-style-control-for-expressive-speech-synthesis-2104.03521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-multi-scale-style-control-for-expressive-speech-synthesis-2104.03521"/></url>
<url><loc>https://scifaro.com/en/abs/metricgan-an-improved-version-of-metricgan-for-speech-enhancement-2104.03538</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metricgan-an-improved-version-of-metricgan-for-speech-enhancement-2104.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metricgan-an-improved-version-of-metricgan-for-speech-enhancement-2104.03538"/></url>
<url><loc>https://scifaro.com/en/abs/wnars-wfst-based-non-autoregressive-streaming-end-to-end-speech-recognition-2104.03587</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wnars-wfst-based-non-autoregressive-streaming-end-to-end-speech-recognition-2104.03587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wnars-wfst-based-non-autoregressive-streaming-end-to-end-speech-recognition-2104.03587"/></url>
<url><loc>https://scifaro.com/en/abs/aishell-4-an-open-source-dataset-for-speech-enhancement-separation-recognition-and-speaker-diarization-in-conference-scenario-2104.03603</loc><lastmod>2021-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aishell-4-an-open-source-dataset-for-speech-enhancement-separation-recognition-and-speaker-diarization-in-conference-scenario-2104.03603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aishell-4-an-open-source-dataset-for-speech-enhancement-separation-recognition-and-speaker-diarization-in-conference-scenario-2104.03603"/></url>
<url><loc>https://scifaro.com/en/abs/half-truth-a-partially-fake-audio-detection-dataset-2104.03617</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/half-truth-a-partially-fake-audio-detection-dataset-2104.03617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/half-truth-a-partially-fake-audio-detection-dataset-2104.03617"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-without-clean-training-data-a-noise2noise-approach-2104.03838</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-without-clean-training-data-a-noise2noise-approach-2104.03838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-without-clean-training-data-a-noise2noise-approach-2104.03838"/></url>
<url><loc>https://scifaro.com/en/abs/serumrnn-step-by-step-audio-vst-effect-programming-2104.03876</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serumrnn-step-by-step-audio-vst-effect-programming-2104.03876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serumrnn-step-by-step-audio-vst-effect-programming-2104.03876"/></url>
<url><loc>https://scifaro.com/en/abs/flavored-tacotron-conditional-learning-for-prosodic-linguistic-features-2104.04050</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flavored-tacotron-conditional-learning-for-prosodic-linguistic-features-2104.04050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flavored-tacotron-conditional-learning-for-prosodic-linguistic-features-2104.04050"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-spoofing-detection-inspired-from-audio-generation-artifacts-2104.04111</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-spoofing-detection-inspired-from-audio-generation-artifacts-2104.04111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-spoofing-detection-inspired-from-audio-generation-artifacts-2104.04111"/></url>
<url><loc>https://scifaro.com/en/abs/heaps-law-and-vocabulary-richness-in-the-history-of-classical-music-harmony-2104.04143</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heaps-law-and-vocabulary-richness-in-the-history-of-classical-music-harmony-2104.04143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heaps-law-and-vocabulary-richness-in-the-history-of-classical-music-harmony-2104.04143"/></url>
<url><loc>https://scifaro.com/en/abs/joint-online-multichannel-acoustic-echo-cancellation-speech-dereverberation-and-source-separation-2104.04325</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-online-multichannel-acoustic-echo-cancellation-speech-dereverberation-and-source-separation-2104.04325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-online-multichannel-acoustic-echo-cancellation-speech-dereverberation-and-source-separation-2104.04325"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-learning-for-audio-visual-video-parsing-2104.04598</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-learning-for-audio-visual-video-parsing-2104.04598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-learning-for-audio-visual-video-parsing-2104.04598"/></url>
<url><loc>https://scifaro.com/en/abs/unified-source-filter-gan-unified-source-filter-network-based-on-factorization-of-quasi-periodic-parallel-wavegan-2104.04668</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-source-filter-gan-unified-source-filter-network-based-on-factorization-of-quasi-periodic-parallel-wavegan-2104.04668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-source-filter-gan-unified-source-filter-network-based-on-factorization-of-quasi-periodic-parallel-wavegan-2104.04668"/></url>
<url><loc>https://scifaro.com/en/abs/boundary-and-context-aware-training-for-cif-based-non-autoregressive-end-to-end-asr-2104.04702</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boundary-and-context-aware-training-for-cif-based-non-autoregressive-end-to-end-asr-2104.04702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boundary-and-context-aware-training-for-cif-based-non-autoregressive-end-to-end-asr-2104.04702"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-mandarin-tone-classification-with-short-term-context-information-2104.05657</loc><lastmod>2021-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-mandarin-tone-classification-with-short-term-context-information-2104.05657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-mandarin-tone-classification-with-short-term-context-information-2104.05657"/></url>
<url><loc>https://scifaro.com/en/abs/extremely-low-footprint-end-to-end-asr-system-for-smart-device-2104.05784</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extremely-low-footprint-end-to-end-asr-system-for-smart-device-2104.05784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extremely-low-footprint-end-to-end-asr-system-for-smart-device-2104.05784"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-escalation-level-from-speech-with-transfer-learning-and-acoustic-lexical-information-fusion-2104.06004</loc><lastmod>2021-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-escalation-level-from-speech-with-transfer-learning-and-acoustic-lexical-information-fusion-2104.06004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-escalation-level-from-speech-with-transfer-learning-and-acoustic-lexical-information-fusion-2104.06004"/></url>
<url><loc>https://scifaro.com/en/abs/noisevc-towards-high-quality-zero-shot-voice-conversion-2104.06074</loc><lastmod>2021-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisevc-towards-high-quality-zero-shot-voice-conversion-2104.06074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisevc-towards-high-quality-zero-shot-voice-conversion-2104.06074"/></url>
<url><loc>https://scifaro.com/en/abs/visually-informed-binaural-audio-generation-without-binaural-audios-2104.06162</loc><lastmod>2021-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-informed-binaural-audio-generation-without-binaural-audios-2104.06162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-informed-binaural-audio-generation-without-binaural-audios-2104.06162"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-and-analysis-of-deep-audio-embeddings-for-music-emotion-recognition-2104.06517</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-and-analysis-of-deep-audio-embeddings-for-music-emotion-recognition-2104.06517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-and-analysis-of-deep-audio-embeddings-for-music-emotion-recognition-2104.06517"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-the-onsets-and-frames-model-with-additive-attention-2104.06607</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-the-onsets-and-frames-model-with-additive-attention-2104.06607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-the-onsets-and-frames-model-with-additive-attention-2104.06607"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-keyword-spotting-using-neural-architecture-search-and-quantization-2104.06666</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-keyword-spotting-using-neural-architecture-search-and-quantization-2104.06666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-keyword-spotting-using-neural-architecture-search-and-quantization-2104.06666"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-sequence-to-sequence-voice-conversion-2104.06793</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-sequence-to-sequence-voice-conversion-2104.06793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-sequence-to-sequence-voice-conversion-2104.06793"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-conformer-based-speech-recognition-with-linear-attention-2104.06865</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-conformer-based-speech-recognition-with-linear-attention-2104.06865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-conformer-based-speech-recognition-with-linear-attention-2104.06865"/></url>
<url><loc>https://scifaro.com/en/abs/fasts2s-vc-streaming-non-autoregressive-sequence-to-sequence-voice-conversion-2104.06900</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fasts2s-vc-streaming-non-autoregressive-sequence-to-sequence-voice-conversion-2104.06900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fasts2s-vc-streaming-non-autoregressive-sequence-to-sequence-voice-conversion-2104.06900"/></url>
<url><loc>https://scifaro.com/en/abs/audio-feature-ranking-for-sound-based-covid-19-patient-detection-2104.07128</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-feature-ranking-for-sound-based-covid-19-patient-detection-2104.07128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-feature-ranking-for-sound-based-covid-19-patient-detection-2104.07128"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-design-of-deep-priors-for-unsupervised-audio-restoration-2104.07161</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-design-of-deep-priors-for-unsupervised-audio-restoration-2104.07161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-design-of-deep-priors-for-unsupervised-audio-restoration-2104.07161"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-fake-audio-detection-2104.07286</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-fake-audio-detection-2104.07286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-fake-audio-detection-2104.07286"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-speech-recognition-with-unsupervised-character-level-distribution-matching-2104.07491</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-speech-recognition-with-unsupervised-character-level-distribution-matching-2104.07491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-speech-recognition-with-unsupervised-character-level-distribution-matching-2104.07491"/></url>
<url><loc>https://scifaro.com/en/abs/spectrogram-inpainting-for-interactive-generation-of-instrument-sounds-2104.07519</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrogram-inpainting-for-interactive-generation-of-instrument-sounds-2104.07519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrogram-inpainting-for-interactive-generation-of-instrument-sounds-2104.07519"/></url>
<url><loc>https://scifaro.com/en/abs/mimo-self-attentive-rnn-beamformer-for-multi-speaker-speech-separation-2104.08450</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimo-self-attentive-rnn-beamformer-for-multi-speaker-speech-separation-2104.08450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimo-self-attentive-rnn-beamformer-for-multi-speaker-speech-separation-2104.08450"/></url>
<url><loc>https://scifaro.com/en/abs/uncovering-audio-patterns-in-music-with-nonnegative-tucker-decomposition-for-structural-segmentation-2104.08580</loc><lastmod>2022-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncovering-audio-patterns-in-music-with-nonnegative-tucker-decomposition-for-structural-segmentation-2104.08580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncovering-audio-patterns-in-music-with-nonnegative-tucker-decomposition-for-structural-segmentation-2104.08580"/></url>
<url><loc>https://scifaro.com/en/abs/cetacean-translation-initiative-a-roadmap-to-deciphering-the-communication-of-sperm-whales-2104.08614</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cetacean-translation-initiative-a-roadmap-to-deciphering-the-communication-of-sperm-whales-2104.08614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cetacean-translation-initiative-a-roadmap-to-deciphering-the-communication-of-sperm-whales-2104.08614"/></url>
<url><loc>https://scifaro.com/en/abs/best-practices-for-noise-based-augmentation-to-improve-the-performance-of-deployable-speech-based-emotion-recognition-systems-2104.08806</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/best-practices-for-noise-based-augmentation-to-improve-the-performance-of-deployable-speech-based-emotion-recognition-systems-2104.08806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/best-practices-for-noise-based-augmentation-to-improve-the-performance-of-deployable-speech-based-emotion-recognition-systems-2104.08806"/></url>
<url><loc>https://scifaro.com/en/abs/low-frequency-characterization-of-music-sounds-ultra-bass-richness-from-the-sound-wave-beats-2104.08872</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-frequency-characterization-of-music-sounds-ultra-bass-richness-from-the-sound-wave-beats-2104.08872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-frequency-characterization-of-music-sounds-ultra-bass-richness-from-the-sound-wave-beats-2104.08872"/></url>
<url><loc>https://scifaro.com/en/abs/many-speakers-single-channel-speech-separation-with-optimal-permutation-training-2104.08955</loc><lastmod>2021-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-speakers-single-channel-speech-separation-with-optimal-permutation-training-2104.08955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-speakers-single-channel-speech-separation-with-optimal-permutation-training-2104.08955"/></url>
<url><loc>https://scifaro.com/en/abs/an-interdisciplinary-review-of-music-performance-analysis-2104.09018</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-interdisciplinary-review-of-music-performance-analysis-2104.09018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-interdisciplinary-review-of-music-performance-analysis-2104.09018"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-of-generative-cnns-trained-on-waveforms-2104.09489</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-of-generative-cnns-trained-on-waveforms-2104.09489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-of-generative-cnns-trained-on-waveforms-2104.09489"/></url>
<url><loc>https://scifaro.com/en/abs/adaspeech-2-adaptive-text-to-speech-with-untranscribed-data-2104.09715</loc><lastmod>2021-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaspeech-2-adaptive-text-to-speech-with-untranscribed-data-2104.09715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaspeech-2-adaptive-text-to-speech-with-untranscribed-data-2104.09715"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-phasicity-prediction-from-arterial-sounds-through-spectrogram-analysis-using-convolutional-neural-networks-for-limb-perfusion-assessment-2104.09748</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-phasicity-prediction-from-arterial-sounds-through-spectrogram-analysis-using-convolutional-neural-networks-for-limb-perfusion-assessment-2104.09748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-phasicity-prediction-from-arterial-sounds-through-spectrogram-analysis-using-convolutional-neural-networks-for-limb-perfusion-assessment-2104.09748"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-fake-stereo-audio-2104.09832</loc><lastmod>2021-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-fake-stereo-audio-2104.09832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-fake-stereo-audio-2104.09832"/></url>
<url><loc>https://scifaro.com/en/abs/a-cappella-audio-visual-singing-voice-separation-2104.09946</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cappella-audio-visual-singing-voice-separation-2104.09946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cappella-audio-visual-singing-voice-separation-2104.09946"/></url>
<url><loc>https://scifaro.com/en/abs/review-of-end-to-end-speech-synthesis-technology-based-on-deep-learning-2104.09995</loc><lastmod>2021-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/review-of-end-to-end-speech-synthesis-technology-based-on-deep-learning-2104.09995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/review-of-end-to-end-speech-synthesis-technology-based-on-deep-learning-2104.09995"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-impact-of-word-error-rate-on-acoustic-linguistic-speech-emotion-recognition-an-update-for-the-deep-learning-era-2104.10121</loc><lastmod>2021-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-impact-of-word-error-rate-on-acoustic-linguistic-speech-emotion-recognition-an-update-for-the-deep-learning-era-2104.10121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-impact-of-word-error-rate-on-acoustic-linguistic-speech-emotion-recognition-an-update-for-the-deep-learning-era-2104.10121"/></url>
<url><loc>https://scifaro.com/en/abs/room-adaptive-conditioning-method-for-sound-event-classification-in-reverberant-environments-2104.10431</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-adaptive-conditioning-method-for-sound-event-classification-in-reverberant-environments-2104.10431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-adaptive-conditioning-method-for-sound-event-classification-in-reverberant-environments-2104.10431"/></url>
<url><loc>https://scifaro.com/en/abs/protecting-gender-and-identity-with-disentangled-speech-representations-2104.11051</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/protecting-gender-and-identity-with-disentangled-speech-representations-2104.11051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/protecting-gender-and-identity-with-disentangled-speech-representations-2104.11051"/></url>
<url><loc>https://scifaro.com/en/abs/restoring-degraded-speech-via-a-modified-diffusion-model-2104.11347</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/restoring-degraded-speech-via-a-modified-diffusion-model-2104.11347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/restoring-degraded-speech-via-a-modified-diffusion-model-2104.11347"/></url>
<url><loc>https://scifaro.com/en/abs/infant-vocal-tract-development-analysis-and-diagnosis-by-cry-signals-with-cnn-age-classification-2104.11395</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infant-vocal-tract-development-analysis-and-diagnosis-by-cry-signals-with-cnn-age-classification-2104.11395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infant-vocal-tract-development-analysis-and-diagnosis-by-cry-signals-with-cnn-age-classification-2104.11395"/></url>
<url><loc>https://scifaro.com/en/abs/3d-convolutional-neural-networks-for-ultrasound-based-silent-speech-interfaces-2104.11532</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-convolutional-neural-networks-for-ultrasound-based-silent-speech-interfaces-2104.11532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-convolutional-neural-networks-for-ultrasound-based-silent-speech-interfaces-2104.11532"/></url>
<url><loc>https://scifaro.com/en/abs/esresne-x-t-fbsp-learning-robust-time-frequency-transformation-of-audio-2104.11587</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/esresne-x-t-fbsp-learning-robust-time-frequency-transformation-of-audio-2104.11587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/esresne-x-t-fbsp-learning-robust-time-frequency-transformation-of-audio-2104.11587"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-speech-from-real-time-articulatory-mri-using-neural-vocoders-2104.11598</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-speech-from-real-time-articulatory-mri-using-neural-vocoders-2104.11598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-speech-from-real-time-articulatory-mri-using-neural-vocoders-2104.11598"/></url>
<url><loc>https://scifaro.com/en/abs/improving-neural-silent-speech-interface-models-by-adversarial-training-2104.11601</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-neural-silent-speech-interface-models-by-adversarial-training-2104.11601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-neural-silent-speech-interface-models-by-adversarial-training-2104.11601"/></url>
<url><loc>https://scifaro.com/en/abs/deepspectrumlite-a-power-efficient-transfer-learning-framework-for-embedded-speech-and-audio-processing-from-decentralised-data-2104.11629</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepspectrumlite-a-power-efficient-transfer-learning-framework-for-embedded-speech-and-audio-processing-from-decentralised-data-2104.11629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepspectrumlite-a-power-efficient-transfer-learning-framework-for-embedded-speech-and-audio-processing-from-decentralised-data-2104.11629"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-assessment-of-synthetic-speech-naturalness-2104.11673</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-assessment-of-synthetic-speech-naturalness-2104.11673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-assessment-of-synthetic-speech-naturalness-2104.11673"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-voice-activity-detection-hybrid-audio-segmentation-for-direct-speech-translation-2104.11710</loc><lastmod>2021-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-voice-activity-detection-hybrid-audio-segmentation-for-direct-speech-translation-2104.11710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-voice-activity-detection-hybrid-audio-segmentation-for-direct-speech-translation-2104.11710"/></url>
<url><loc>https://scifaro.com/en/abs/music-embedding-a-tool-for-incorporating-music-theory-into-computational-music-applications-2104.11880</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-embedding-a-tool-for-incorporating-music-theory-into-computational-music-applications-2104.11880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-embedding-a-tool-for-incorporating-music-theory-into-computational-music-applications-2104.11880"/></url>
<url><loc>https://scifaro.com/en/abs/muscaps-generating-captions-for-music-audio-2104.11984</loc><lastmod>2021-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muscaps-generating-captions-for-music-audio-2104.11984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muscaps-generating-captions-for-music-audio-2104.11984"/></url>
<url><loc>https://scifaro.com/en/abs/an-adaptive-learning-based-generative-adversarial-network-for-one-to-one-voice-conversion-2104.12159</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adaptive-learning-based-generative-adversarial-network-for-one-to-one-voice-conversion-2104.12159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adaptive-learning-based-generative-adversarial-network-for-one-to-one-voice-conversion-2104.12159"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-synthesis-techniques-for-midi-to-audio-synthesis-2104.12292</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-synthesis-techniques-for-midi-to-audio-synthesis-2104.12292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-synthesis-techniques-for-midi-to-audio-synthesis-2104.12292"/></url>
<url><loc>https://scifaro.com/en/abs/complex-neural-spatial-filter-enhancing-multi-channel-target-speech-separation-in-complex-domain-2104.12359</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-neural-spatial-filter-enhancing-multi-channel-target-speech-separation-in-complex-domain-2104.12359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-neural-spatial-filter-enhancing-multi-channel-target-speech-separation-in-complex-domain-2104.12359"/></url>
<url><loc>https://scifaro.com/en/abs/generation-of-musical-patterns-through-operads-2104.12432</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generation-of-musical-patterns-through-operads-2104.12432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generation-of-musical-patterns-through-operads-2104.12432"/></url>
<url><loc>https://scifaro.com/en/abs/points2sound-from-mono-to-binaural-audio-using-3d-point-cloud-scenes-2104.12462</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/points2sound-from-mono-to-binaural-audio-using-3d-point-cloud-scenes-2104.12462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/points2sound-from-mono-to-binaural-audio-using-3d-point-cloud-scenes-2104.12462"/></url>
<url><loc>https://scifaro.com/en/abs/identifying-actions-for-sound-event-classification-2104.12693</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identifying-actions-for-sound-event-classification-2104.12693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identifying-actions-for-sound-event-classification-2104.12693"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-self-supervised-learning-of-general-audio-representations-2104.12807</loc><lastmod>2021-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-self-supervised-learning-of-general-audio-representations-2104.12807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-self-supervised-learning-of-general-audio-representations-2104.12807"/></url>
<url><loc>https://scifaro.com/en/abs/one-billion-audio-sounds-from-gpu-enabled-modular-synthesis-2104.12922</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-billion-audio-sounds-from-gpu-enabled-modular-synthesis-2104.12922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-billion-audio-sounds-from-gpu-enabled-modular-synthesis-2104.12922"/></url>
<url><loc>https://scifaro.com/en/abs/dpt-fsnet-dual-path-transformer-based-full-band-and-sub-band-fusion-network-for-speech-enhancement-2104.13002</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpt-fsnet-dual-path-transformer-based-full-band-and-sub-band-fusion-network-for-speech-enhancement-2104.13002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpt-fsnet-dual-path-transformer-based-full-band-and-sub-band-fusion-network-for-speech-enhancement-2104.13002"/></url>
<url><loc>https://scifaro.com/en/abs/the-music-box-operad-random-generation-of-musical-phrases-from-patterns-2104.13040</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-music-box-operad-random-generation-of-musical-phrases-from-patterns-2104.13040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-music-box-operad-random-generation-of-musical-phrases-from-patterns-2104.13040"/></url>
<url><loc>https://scifaro.com/en/abs/generating-lead-sheets-with-affect-a-novel-conditional-seq2seq-framework-2104.13056</loc><lastmod>2021-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-lead-sheets-with-affect-a-novel-conditional-seq2seq-framework-2104.13056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-lead-sheets-with-affect-a-novel-conditional-seq2seq-framework-2104.13056"/></url>
<url><loc>https://scifaro.com/en/abs/batebit-controller-popularizing-digital-musical-instruments-development-process-2104.13266</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/batebit-controller-popularizing-digital-musical-instruments-development-process-2104.13266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/batebit-controller-popularizing-digital-musical-instruments-development-process-2104.13266"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-analysis-informed-content-estimation-and-audio-source-separation-2104.13276</loc><lastmod>2021-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-analysis-informed-content-estimation-and-audio-source-separation-2104.13276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-analysis-informed-content-estimation-and-audio-source-separation-2104.13276"/></url>
<url><loc>https://scifaro.com/en/abs/improving-fairness-in-speaker-recognition-2104.14067</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-fairness-in-speaker-recognition-2104.14067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-fairness-in-speaker-recognition-2104.14067"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-from-federated-acoustic-models-2104.14297</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-from-federated-acoustic-models-2104.14297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-from-federated-acoustic-models-2104.14297"/></url>
<url><loc>https://scifaro.com/en/abs/star-dgt-a-robust-gabor-transform-for-speech-denoising-2104.14468</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/star-dgt-a-robust-gabor-transform-for-speech-denoising-2104.14468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/star-dgt-a-robust-gabor-transform-for-speech-denoising-2104.14468"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-of-the-singing-voice-toward-a-real-time-analysis-tool-for-singers-2105.00173</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-of-the-singing-voice-toward-a-real-time-analysis-tool-for-singers-2105.00173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-of-the-singing-voice-toward-a-real-time-analysis-tool-for-singers-2105.00173"/></url>
<url><loc>https://scifaro.com/en/abs/audio-transformers-2105.00335</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-transformers-2105.00335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-transformers-2105.00335"/></url>
<url><loc>https://scifaro.com/en/abs/avatr-one-shot-speaker-extraction-with-transformers-2105.00609</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avatr-one-shot-speaker-extraction-with-transformers-2105.00609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avatr-one-shot-speaker-extraction-with-transformers-2105.00609"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-audio-visual-consistency-with-partial-supervision-for-spatial-audio-generation-2105.00708</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-audio-visual-consistency-with-partial-supervision-for-spatial-audio-generation-2105.00708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-audio-visual-consistency-with-partial-supervision-for-spatial-audio-generation-2105.00708"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-for-musical-instrument-recognition-using-mfccs-2105.00933</loc><lastmod>2026-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-for-musical-instrument-recognition-using-mfccs-2105.00933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-for-musical-instrument-recognition-using-mfccs-2105.00933"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-end-to-end-speech-recognition-with-jointly-trained-neural-feature-enhancement-2105.01254</loc><lastmod>2021-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-end-to-end-speech-recognition-with-jointly-trained-neural-feature-enhancement-2105.01254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-end-to-end-speech-recognition-with-jointly-trained-neural-feature-enhancement-2105.01254"/></url>
<url><loc>https://scifaro.com/en/abs/vqcpc-gan-variable-length-adversarial-audio-synthesis-using-vector-quantized-contrastive-predictive-coding-2105.01531</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vqcpc-gan-variable-length-adversarial-audio-synthesis-using-vector-quantized-contrastive-predictive-coding-2105.01531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vqcpc-gan-variable-length-adversarial-audio-synthesis-using-vector-quantized-contrastive-predictive-coding-2105.01531"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-multichannel-observation-with-partially-missing-channels-2105.01836</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-multichannel-observation-with-partially-missing-channels-2105.01836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-multichannel-observation-with-partially-missing-channels-2105.01836"/></url>
<url><loc>https://scifaro.com/en/abs/improved-feature-extraction-for-crnn-based-multiple-sound-source-localization-2105.01897</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-feature-extraction-for-crnn-based-multiple-sound-source-localization-2105.01897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-feature-extraction-for-crnn-based-multiple-sound-source-localization-2105.01897"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-diarization-for-variable-number-of-speakers-with-local-global-networks-and-discriminative-speaker-embeddings-2105.02096</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-diarization-for-variable-number-of-speakers-with-local-global-networks-and-discriminative-speaker-embeddings-2105.02096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-diarization-for-variable-number-of-speakers-with-local-global-networks-and-discriminative-speaker-embeddings-2105.02096"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-from-automatically-separated-sound-scenes-2105.02132</loc><lastmod>2021-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-from-automatically-separated-sound-scenes-2105.02132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-from-automatically-separated-sound-scenes-2105.02132"/></url>
<url><loc>https://scifaro.com/en/abs/how-do-voices-from-past-speech-synthesis-challenges-compare-today-2105.02373</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-do-voices-from-past-speech-synthesis-challenges-compare-today-2105.02373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-do-voices-from-past-speech-synthesis-challenges-compare-today-2105.02373"/></url>
<url><loc>https://scifaro.com/en/abs/dbnet-a-dual-branch-network-architecture-processing-on-spectrum-and-waveform-for-single-channel-speech-enhancement-2105.02436</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dbnet-a-dual-branch-network-architecture-processing-on-spectrum-and-waveform-for-single-channel-speech-enhancement-2105.02436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dbnet-a-dual-branch-network-architecture-processing-on-spectrum-and-waveform-for-single-channel-speech-enhancement-2105.02436"/></url>
<url><loc>https://scifaro.com/en/abs/deficient-basis-estimation-of-noise-spatial-covariance-matrix-for-rank-constrained-spatial-covariance-matrix-estimation-method-in-blind-speech-extraction-2105.02491</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deficient-basis-estimation-of-noise-spatial-covariance-matrix-for-rank-constrained-spatial-covariance-matrix-estimation-method-in-blind-speech-extraction-2105.02491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deficient-basis-estimation-of-noise-spatial-covariance-matrix-for-rank-constrained-spatial-covariance-matrix-estimation-method-in-blind-speech-extraction-2105.02491"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-separable-polling-attention-and-global-layer-normalization-followed-with-prelu-2105.02509</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-separable-polling-attention-and-global-layer-normalization-followed-with-prelu-2105.02509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-separable-polling-attention-and-global-layer-normalization-followed-with-prelu-2105.02509"/></url>
<url><loc>https://scifaro.com/en/abs/mimii-due-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-with-domain-shifts-due-to-changes-in-operational-and-environmental-conditions-2105.02702</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimii-due-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-with-domain-shifts-due-to-changes-in-operational-and-environmental-conditions-2105.02702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimii-due-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-with-domain-shifts-due-to-changes-in-operational-and-environmental-conditions-2105.02702"/></url>
<url><loc>https://scifaro.com/en/abs/speechmoe-scaling-to-large-acoustic-models-with-dynamic-routing-mixture-of-experts-2105.03036</loc><lastmod>2021-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechmoe-scaling-to-large-acoustic-models-with-dynamic-routing-mixture-of-experts-2105.03036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechmoe-scaling-to-large-acoustic-models-with-dynamic-routing-mixture-of-experts-2105.03036"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-a-data-driven-approach-using-teacher-student-training-2105.04065</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-a-data-driven-approach-using-teacher-student-training-2105.04065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-in-the-wild-a-data-driven-approach-using-teacher-student-training-2105.04065"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-frequency-independent-audio-source-separation-using-convolution-layer-based-on-impulse-invariant-method-2105.04079</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-frequency-independent-audio-source-separation-using-convolution-layer-based-on-impulse-invariant-method-2105.04079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-frequency-independent-audio-source-separation-using-convolution-layer-based-on-impulse-invariant-method-2105.04079"/></url>
<url><loc>https://scifaro.com/en/abs/musemorphose-full-song-and-fine-grained-piano-music-style-transfer-with-one-transformer-vae-2105.04090</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musemorphose-full-song-and-fine-grained-piano-music-style-transfer-with-one-transformer-vae-2105.04090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musemorphose-full-song-and-fine-grained-piano-music-style-transfer-with-one-transformer-vae-2105.04090"/></url>
<url><loc>https://scifaro.com/en/abs/mass-multi-task-anthropomorphic-speech-synthesis-framework-2105.04124</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mass-multi-task-anthropomorphic-speech-synthesis-framework-2105.04124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mass-multi-task-anthropomorphic-speech-synthesis-framework-2105.04124"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-conditional-bounding-box-regression-for-music-score-following-2105.04309</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-conditional-bounding-box-regression-for-music-score-following-2105.04309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-conditional-bounding-box-regression-for-music-score-following-2105.04309"/></url>
<url><loc>https://scifaro.com/en/abs/learning-robust-latent-representations-for-controllable-speech-synthesis-2105.04458</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-robust-latent-representations-for-controllable-speech-synthesis-2105.04458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-robust-latent-representations-for-controllable-speech-synthesis-2105.04458"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-to-audio-based-navigation-in-a-multi-speaker-environment-2105.04488</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-to-audio-based-navigation-in-a-multi-speaker-environment-2105.04488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-to-audio-based-navigation-in-a-multi-speaker-environment-2105.04488"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-popular-music-generation-using-imitation-and-structure-2105.04709</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-popular-music-generation-using-imitation-and-structure-2105.04709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-popular-music-generation-using-imitation-and-structure-2105.04709"/></url>
<url><loc>https://scifaro.com/en/abs/separate-but-together-unsupervised-federated-learning-for-speech-enhancement-from-non-iid-data-2105.04727</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-but-together-unsupervised-federated-learning-for-speech-enhancement-from-non-iid-data-2105.04727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-but-together-unsupervised-federated-learning-for-speech-enhancement-from-non-iid-data-2105.04727"/></url>
<url><loc>https://scifaro.com/en/abs/a-statistical-model-for-melody-reduction-2105.05385</loc><lastmod>2021-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-statistical-model-for-melody-reduction-2105.05385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-statistical-model-for-melody-reduction-2105.05385"/></url>
<url><loc>https://scifaro.com/en/abs/global-structure-aware-drum-transcription-based-on-self-attention-mechanisms-2105.05791</loc><lastmod>2021-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-structure-aware-drum-transcription-based-on-self-attention-mechanisms-2105.05791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-structure-aware-drum-transcription-based-on-self-attention-mechanisms-2105.05791"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-the-additional-features-on-the-performance-of-regression-analysis-a-case-study-on-regression-analysis-of-music-signal-2105.05938</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-the-additional-features-on-the-performance-of-regression-analysis-a-case-study-on-regression-analysis-of-music-signal-2105.05938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-the-additional-features-on-the-performance-of-regression-analysis-a-case-study-on-regression-analysis-of-music-signal-2105.05938"/></url>
<url><loc>https://scifaro.com/en/abs/multi-target-doa-estimation-with-an-audio-visual-fusion-mechanism-2105.06107</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-target-doa-estimation-with-an-audio-visual-fusion-mechanism-2105.06107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-target-doa-estimation-with-an-audio-visual-fusion-mechanism-2105.06107"/></url>
<url><loc>https://scifaro.com/en/abs/audio-captioning-with-composition-of-acoustic-and-semantic-information-2105.06355</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-captioning-with-composition-of-acoustic-and-semantic-information-2105.06355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-captioning-with-composition-of-acoustic-and-semantic-information-2105.06355"/></url>
<url><loc>https://scifaro.com/en/abs/chord-recognition-music-and-audio-information-retrieval-2105.07019</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-recognition-music-and-audio-information-retrieval-2105.07019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-recognition-music-and-audio-information-retrieval-2105.07019"/></url>
<url><loc>https://scifaro.com/en/abs/the-benefit-of-temporally-strong-labels-in-audio-event-classification-2105.07031</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-benefit-of-temporally-strong-labels-in-audio-event-classification-2105.07031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-benefit-of-temporally-strong-labels-in-audio-event-classification-2105.07031"/></url>
<url><loc>https://scifaro.com/en/abs/1d-cnn-architectures-for-music-genre-classification-2105.07302</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1d-cnn-architectures-for-music-genre-classification-2105.07302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1d-cnn-architectures-for-music-genre-classification-2105.07302"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-self-supervised-representation-ensembles-for-covid-19-cough-classification-2105.07566</loc><lastmod>2021-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-self-supervised-representation-ensembles-for-covid-19-cough-classification-2105.07566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-self-supervised-representation-ensembles-for-covid-19-cough-classification-2105.07566"/></url>
<url><loc>https://scifaro.com/en/abs/it-otts-and-it-owave-linear-stochastic-differential-equation-is-all-you-need-for-audio-generation-2105.07583</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/it-otts-and-it-owave-linear-stochastic-differential-equation-is-all-you-need-for-audio-generation-2105.07583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/it-otts-and-it-owave-linear-stochastic-differential-equation-is-all-you-need-for-audio-generation-2105.07583"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-with-adaptive-frequency-selection-2105.07596</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-with-adaptive-frequency-selection-2105.07596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-with-adaptive-frequency-selection-2105.07596"/></url>
<url><loc>https://scifaro.com/en/abs/point-based-acoustic-scattering-for-interactive-sound-propagation-via-surface-encoding-2105.08177</loc><lastmod>2021-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/point-based-acoustic-scattering-for-interactive-sound-propagation-via-surface-encoding-2105.08177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/point-based-acoustic-scattering-for-interactive-sound-propagation-via-surface-encoding-2105.08177"/></url>
<url><loc>https://scifaro.com/en/abs/federated-learning-with-highly-imbalanced-audio-data-2105.08550</loc><lastmod>2021-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-learning-with-highly-imbalanced-audio-data-2105.08550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-learning-with-highly-imbalanced-audio-data-2105.08550"/></url>
<url><loc>https://scifaro.com/en/abs/semour-a-scripted-emotional-speech-repository-for-urdu-2105.08957</loc><lastmod>2021-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semour-a-scripted-emotional-speech-repository-for-urdu-2105.08957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semour-a-scripted-emotional-speech-repository-for-urdu-2105.08957"/></url>
<url><loc>https://scifaro.com/en/abs/attack-on-practical-speaker-verification-system-using-universal-adversarial-perturbations-2105.09022</loc><lastmod>2021-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attack-on-practical-speaker-verification-system-using-universal-adversarial-perturbations-2105.09022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attack-on-practical-speaker-verification-system-using-universal-adversarial-perturbations-2105.09022"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-using-three-layered-lstm-2105.09046</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-using-three-layered-lstm-2105.09046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-using-three-layered-lstm-2105.09046"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-discriminative-learning-of-sounds-for-audio-event-classification-2105.09279</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-discriminative-learning-of-sounds-for-audio-event-classification-2105.09279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-discriminative-learning-of-sounds-for-audio-event-classification-2105.09279"/></url>
<url><loc>https://scifaro.com/en/abs/speech-song-emotion-recognition-using-multilayer-perceptron-and-standard-vector-machine-2105.09406</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-song-emotion-recognition-using-multilayer-perceptron-and-standard-vector-machine-2105.09406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-song-emotion-recognition-using-multilayer-perceptron-and-standard-vector-machine-2105.09406"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-and-low-latency-universal-neural-vocoder-based-on-multiband-wavernn-with-data-driven-linear-prediction-for-discrete-waveform-modeling-2105.09856</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-and-low-latency-universal-neural-vocoder-based-on-multiband-wavernn-with-data-driven-linear-prediction-for-discrete-waveform-modeling-2105.09856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-and-low-latency-universal-neural-vocoder-based-on-multiband-wavernn-with-data-driven-linear-prediction-for-discrete-waveform-modeling-2105.09856"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-real-time-non-parallel-voice-conversion-based-on-cyclic-variational-autoencoder-and-multiband-wavernn-with-data-driven-linear-prediction-2105.09858</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-real-time-non-parallel-voice-conversion-based-on-cyclic-variational-autoencoder-and-multiband-wavernn-with-data-driven-linear-prediction-2105.09858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-real-time-non-parallel-voice-conversion-based-on-cyclic-variational-autoencoder-and-multiband-wavernn-with-data-driven-linear-prediction-2105.09858"/></url>
<url><loc>https://scifaro.com/en/abs/mondegreen-a-post-processing-solution-to-speech-recognition-error-correction-for-voice-search-queries-2105.09930</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mondegreen-a-post-processing-solution-to-speech-recognition-error-correction-for-voice-search-queries-2105.09930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mondegreen-a-post-processing-solution-to-speech-recognition-error-correction-for-voice-search-queries-2105.09930"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-multi-target-domain-adaptation-for-acoustic-scene-classification-2105.10340</loc><lastmod>2021-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-multi-target-domain-adaptation-for-acoustic-scene-classification-2105.10340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-multi-target-domain-adaptation-for-acoustic-scene-classification-2105.10340"/></url>
<url><loc>https://scifaro.com/en/abs/loopnet-musical-loop-synthesis-conditioned-on-intuitive-musical-parameters-2105.10371</loc><lastmod>2021-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loopnet-musical-loop-synthesis-conditioned-on-intuitive-musical-parameters-2105.10371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loopnet-musical-loop-synthesis-conditioned-on-intuitive-musical-parameters-2105.10371"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-audio-representation-learning-for-modeling-beehive-strengths-2105.10536</loc><lastmod>2021-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-audio-representation-learning-for-modeling-beehive-strengths-2105.10536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-audio-representation-learning-for-modeling-beehive-strengths-2105.10536"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-detection-using-recorded-coughs-in-the-2021-dicova-challenge-2105.10619</loc><lastmod>2021-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-detection-using-recorded-coughs-in-the-2021-dicova-challenge-2105.10619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-detection-using-recorded-coughs-in-the-2021-dicova-challenge-2105.10619"/></url>
<url><loc>https://scifaro.com/en/abs/quanta-in-sound-the-sound-of-quanta-a-voice-informed-quantum-theoretical-perspective-on-sound-2105.10781</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quanta-in-sound-the-sound-of-quanta-a-voice-informed-quantum-theoretical-perspective-on-sound-2105.10781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quanta-in-sound-the-sound-of-quanta-a-voice-informed-quantum-theoretical-perspective-on-sound-2105.10781"/></url>
<url><loc>https://scifaro.com/en/abs/rnnoise-ex-hybrid-speech-enhancement-system-based-on-rnn-and-spectral-features-2105.11813</loc><lastmod>2021-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rnnoise-ex-hybrid-speech-enhancement-system-based-on-rnn-and-spectral-features-2105.11813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rnnoise-ex-hybrid-speech-enhancement-system-based-on-rnn-and-spectral-features-2105.11813"/></url>
<url><loc>https://scifaro.com/en/abs/a-modulation-front-end-for-music-audio-tagging-2105.11836</loc><lastmod>2021-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-modulation-front-end-for-music-audio-tagging-2105.11836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-modulation-front-end-for-music-audio-tagging-2105.11836"/></url>
<url><loc>https://scifaro.com/en/abs/spectrum-correction-acoustic-scene-classification-with-mismatched-recording-devices-2105.11856</loc><lastmod>2021-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrum-correction-acoustic-scene-classification-with-mismatched-recording-devices-2105.11856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrum-correction-acoustic-scene-classification-with-mismatched-recording-devices-2105.11856"/></url>
<url><loc>https://scifaro.com/en/abs/receptive-field-regularization-techniques-for-audio-classification-and-tagging-with-deep-convolutional-neural-networks-2105.12395</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/receptive-field-regularization-techniques-for-audio-classification-and-tagging-with-deep-convolutional-neural-networks-2105.12395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/receptive-field-regularization-techniques-for-audio-classification-and-tagging-with-deep-convolutional-neural-networks-2105.12395"/></url>
<url><loc>https://scifaro.com/en/abs/compensating-class-imbalance-for-acoustic-chimpanzee-detection-with-convolutional-recurrent-neural-networks-2105.12502</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compensating-class-imbalance-for-acoustic-chimpanzee-detection-with-convolutional-recurrent-neural-networks-2105.12502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compensating-class-imbalance-for-acoustic-chimpanzee-detection-with-convolutional-recurrent-neural-networks-2105.12502"/></url>
<url><loc>https://scifaro.com/en/abs/self-attending-rnn-for-speech-enhancement-to-improve-cross-corpus-generalization-2105.12831</loc><lastmod>2022-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attending-rnn-for-speech-enhancement-to-improve-cross-corpus-generalization-2105.12831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attending-rnn-for-speech-enhancement-to-improve-cross-corpus-generalization-2105.12831"/></url>
<url><loc>https://scifaro.com/en/abs/phone-level-prosody-modelling-with-gmm-based-mdn-for-diverse-and-controllable-speech-synthesis-2105.13086</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phone-level-prosody-modelling-with-gmm-based-mdn-for-diverse-and-controllable-speech-synthesis-2105.13086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phone-level-prosody-modelling-with-gmm-based-mdn-for-diverse-and-controllable-speech-synthesis-2105.13086"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-concept-drift-adaptation-for-acoustic-scene-classifier-based-on-kernel-density-drift-detection-and-combine-merge-gaussian-mixture-model-2105.13220</loc><lastmod>2021-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-concept-drift-adaptation-for-acoustic-scene-classifier-based-on-kernel-density-drift-detection-and-combine-merge-gaussian-mixture-model-2105.13220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-concept-drift-adaptation-for-acoustic-scene-classifier-based-on-kernel-density-drift-detection-and-combine-merge-gaussian-mixture-model-2105.13220"/></url>
<url><loc>https://scifaro.com/en/abs/cross-referencing-self-training-network-for-sound-event-detection-in-audio-mixtures-2105.13392</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-referencing-self-training-network-for-sound-event-detection-in-audio-mixtures-2105.13392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-referencing-self-training-network-for-sound-event-detection-in-audio-mixtures-2105.13392"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-for-ultrasound-based-silent-speech-interfaces-using-convolutional-neural-networks-2105.13718</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-for-ultrasound-based-silent-speech-interfaces-using-convolutional-neural-networks-2105.13718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-for-ultrasound-based-silent-speech-interfaces-using-convolutional-neural-networks-2105.13718"/></url>
<url><loc>https://scifaro.com/en/abs/dive-end-to-end-speech-diarization-via-iterative-speaker-embedding-2105.13802</loc><lastmod>2021-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dive-end-to-end-speech-diarization-via-iterative-speaker-embedding-2105.13802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dive-end-to-end-speech-diarization-via-iterative-speaker-embedding-2105.13802"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-artificial-reverberation-2105.13940</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-artificial-reverberation-2105.13940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-artificial-reverberation-2105.13940"/></url>
<url><loc>https://scifaro.com/en/abs/echofilter-end-to-end-neural-network-for-acoustic-echo-cancellation-2105.14666</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echofilter-end-to-end-neural-network-for-acoustic-echo-cancellation-2105.14666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echofilter-end-to-end-neural-network-for-acoustic-echo-cancellation-2105.14666"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-temporal-convolution-network-for-classroom-voice-detection-2105.14717</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-temporal-convolution-network-for-classroom-voice-detection-2105.14717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-temporal-convolution-network-for-classroom-voice-detection-2105.14717"/></url>
<url><loc>https://scifaro.com/en/abs/noise-classification-aided-attention-based-neural-network-for-monaural-speech-enhancement-2105.14719</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-classification-aided-attention-based-neural-network-for-monaural-speech-enhancement-2105.14719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-classification-aided-attention-based-neural-network-for-monaural-speech-enhancement-2105.14719"/></url>
<url><loc>https://scifaro.com/en/abs/singing-language-identification-using-a-deep-phonotactic-approach-2105.15014</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-language-identification-using-a-deep-phonotactic-approach-2105.15014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-language-identification-using-a-deep-phonotactic-approach-2105.15014"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-attention-neural-network-for-acoustic-echo-cancellation-2106.00010</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-attention-neural-network-for-acoustic-echo-cancellation-2106.00010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-attention-neural-network-for-acoustic-echo-cancellation-2106.00010"/></url>
<url><loc>https://scifaro.com/en/abs/a-methodology-for-exploring-deep-convolutional-features-in-relation-to-hand-crafted-features-with-an-application-to-music-audio-modeling-2106.00110</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-methodology-for-exploring-deep-convolutional-features-in-relation-to-hand-crafted-features-with-an-application-to-music-audio-modeling-2106.00110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-methodology-for-exploring-deep-convolutional-features-in-relation-to-hand-crafted-features-with-an-application-to-music-audio-modeling-2106.00110"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-adversarial-robustness-for-speaker-verification-by-self-supervised-learning-2106.00273</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-adversarial-robustness-for-speaker-verification-by-self-supervised-learning-2106.00273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-adversarial-robustness-for-speaker-verification-by-self-supervised-learning-2106.00273"/></url>
<url><loc>https://scifaro.com/en/abs/omnizart-a-general-toolbox-for-automatic-music-transcription-2106.00497</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omnizart-a-general-toolbox-for-automatic-music-transcription-2106.00497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omnizart-a-general-toolbox-for-automatic-music-transcription-2106.00497"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-exotic-counterpoint-compositions-2106.00806</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-exotic-counterpoint-compositions-2106.00806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-exotic-counterpoint-compositions-2106.00806"/></url>
<url><loc>https://scifaro.com/en/abs/nvc-net-end-to-end-adversarial-voice-conversion-2106.00992</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nvc-net-end-to-end-adversarial-voice-conversion-2106.00992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nvc-net-end-to-end-adversarial-voice-conversion-2106.00992"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-modality-agnostic-representations-for-music-classification-2106.01149</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-modality-agnostic-representations-for-music-classification-2106.01149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-modality-agnostic-representations-for-music-classification-2106.01149"/></url>
<url><loc>https://scifaro.com/en/abs/sound-to-imagination-an-exploratory-study-on-unsupervised-crossmodal-translation-using-diverse-audiovisual-data-2106.01266</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-to-imagination-an-exploratory-study-on-unsupervised-crossmodal-translation-using-diverse-audiovisual-data-2106.01266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-to-imagination-an-exploratory-study-on-unsupervised-crossmodal-translation-using-diverse-audiovisual-data-2106.01266"/></url>
<url><loc>https://scifaro.com/en/abs/a-preliminary-study-of-a-two-stage-paradigm-for-preserving-speaker-identity-in-dysarthric-voice-conversion-2106.01415</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-preliminary-study-of-a-two-stage-paradigm-for-preserving-speaker-identity-in-dysarthric-voice-conversion-2106.01415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-preliminary-study-of-a-two-stage-paradigm-for-preserving-speaker-identity-in-dysarthric-voice-conversion-2106.01415"/></url>
<url><loc>https://scifaro.com/en/abs/eranns-efficient-residual-audio-neural-networks-for-audio-pattern-recognition-2106.01621</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eranns-efficient-residual-audio-neural-networks-for-audio-pattern-recognition-2106.01621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eranns-efficient-residual-audio-neural-networks-for-audio-pattern-recognition-2106.01621"/></url>
<url><loc>https://scifaro.com/en/abs/language-independent-speech-emotion-and-non-invasive-early-detection-of-neurocognitive-disorder-2106.01684</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-independent-speech-emotion-and-non-invasive-early-detection-of-neurocognitive-disorder-2106.01684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-independent-speech-emotion-and-non-invasive-early-detection-of-neurocognitive-disorder-2106.01684"/></url>
<url><loc>https://scifaro.com/en/abs/heart-sound-classification-considering-additive-noise-and-convolutional-distortion-2106.01865</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heart-sound-classification-considering-additive-noise-and-convolutional-distortion-2106.01865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heart-sound-classification-considering-additive-noise-and-convolutional-distortion-2106.01865"/></url>
<url><loc>https://scifaro.com/en/abs/lyricjam-a-system-for-generating-lyrics-for-live-instrumental-music-2106.01960</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lyricjam-a-system-for-generating-lyrics-for-live-instrumental-music-2106.01960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lyricjam-a-system-for-generating-lyrics-for-live-instrumental-music-2106.01960"/></url>
<url><loc>https://scifaro.com/en/abs/a-database-for-research-on-detection-and-enhancement-of-speech-transmitted-over-hf-links-2106.02472</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-database-for-research-on-detection-and-enhancement-of-speech-transmitted-over-hf-links-2106.02472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-database-for-research-on-detection-and-enhancement-of-speech-transmitted-over-hf-links-2106.02472"/></url>
<url><loc>https://scifaro.com/en/abs/musical-prosody-driven-emotion-classification-interpreting-vocalists-portrayal-of-emotions-through-machine-learning-2106.02556</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-prosody-driven-emotion-classification-interpreting-vocalists-portrayal-of-emotions-through-machine-learning-2106.02556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-prosody-driven-emotion-classification-interpreting-vocalists-portrayal-of-emotions-through-machine-learning-2106.02556"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-dual-channel-target-speaker-separation-for-mobile-voice-communication-2106.02934</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-dual-channel-target-speaker-separation-for-mobile-voice-communication-2106.02934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-dual-channel-target-speaker-separation-for-mobile-voice-communication-2106.02934"/></url>
<url><loc>https://scifaro.com/en/abs/empirical-bayesian-independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-2106.03492</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empirical-bayesian-independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-2106.03492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empirical-bayesian-independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-2106.03492"/></url>
<url><loc>https://scifaro.com/en/abs/active-speaker-detection-as-a-multi-objective-optimization-with-uncertainty-based-multimodal-fusion-2106.03821</loc><lastmod>2021-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-speaker-detection-as-a-multi-objective-optimization-with-uncertainty-based-multimodal-fusion-2106.03821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-speaker-detection-as-a-multi-objective-optimization-with-uncertainty-based-multimodal-fusion-2106.03821"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-data-splits-on-generalization-identifying-covid-19-from-cough-and-context-2106.03851</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-data-splits-on-generalization-identifying-covid-19-from-cough-and-context-2106.03851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-data-splits-on-generalization-identifying-covid-19-from-cough-and-context-2106.03851"/></url>
<url><loc>https://scifaro.com/en/abs/pilot-introducing-transformers-for-probabilistic-sound-event-localization-2106.03903</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pilot-introducing-transformers-for-probabilistic-sound-event-localization-2106.03903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pilot-introducing-transformers-for-probabilistic-sound-event-localization-2106.03903"/></url>
<url><loc>https://scifaro.com/en/abs/byakto-speech-real-time-long-speech-synthesis-with-convolutional-neural-network-transfer-learning-from-english-to-bangla-2106.03937</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/byakto-speech-real-time-long-speech-synthesis-with-convolutional-neural-network-transfer-learning-from-english-to-bangla-2106.03937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/byakto-speech-real-time-long-speech-synthesis-with-convolutional-neural-network-transfer-learning-from-english-to-bangla-2106.03937"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-speech-emotion-recognition-using-multi-scale-cnn-and-attention-2106.04133</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-speech-emotion-recognition-using-multi-scale-cnn-and-attention-2106.04133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-speech-emotion-recognition-using-multi-scale-cnn-and-attention-2106.04133"/></url>
<url><loc>https://scifaro.com/en/abs/broadcasted-residual-learning-for-efficient-keyword-spotting-2106.04140</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/broadcasted-residual-learning-for-efficient-keyword-spotting-2106.04140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/broadcasted-residual-learning-for-efficient-keyword-spotting-2106.04140"/></url>
<url><loc>https://scifaro.com/en/abs/raw-waveform-encoder-with-multi-scale-globally-attentive-locally-recurrent-networks-for-end-to-end-speech-recognition-2106.04275</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raw-waveform-encoder-with-multi-scale-globally-attentive-locally-recurrent-networks-for-end-to-end-speech-recognition-2106.04275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raw-waveform-encoder-with-multi-scale-globally-attentive-locally-recurrent-networks-for-end-to-end-speech-recognition-2106.04275"/></url>
<url><loc>https://scifaro.com/en/abs/nwt-towards-natural-audio-to-video-generation-with-representation-learning-2106.04283</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nwt-towards-natural-audio-to-video-generation-with-representation-learning-2106.04283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nwt-towards-natural-audio-to-video-generation-with-representation-learning-2106.04283"/></url>
<url><loc>https://scifaro.com/en/abs/panacea-cough-sound-based-diagnosis-of-covid-19-for-the-dicova-2021-challenge-2106.04423</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/panacea-cough-sound-based-diagnosis-of-covid-19-for-the-dicova-2021-challenge-2106.04423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/panacea-cough-sound-based-diagnosis-of-covid-19-for-the-dicova-2021-challenge-2106.04423"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speaker-embeddings-for-ultrasound-based-silent-speech-interfaces-2106.04552</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speaker-embeddings-for-ultrasound-based-silent-speech-interfaces-2106.04552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speaker-embeddings-for-ultrasound-based-silent-speech-interfaces-2106.04552"/></url>
<url><loc>https://scifaro.com/en/abs/optimising-hearing-aid-fittings-for-speech-in-noise-with-a-differentiable-hearing-loss-model-2106.04639</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimising-hearing-aid-fittings-for-speech-in-noise-with-a-differentiable-hearing-loss-model-2106.04639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimising-hearing-aid-fittings-for-speech-in-noise-with-a-differentiable-hearing-loss-model-2106.04639"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recovery-for-real-world-self-powered-intermittent-devices-2106.05229</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recovery-for-real-world-self-powered-intermittent-devices-2106.05229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recovery-for-real-world-self-powered-intermittent-devices-2106.05229"/></url>
<url><loc>https://scifaro.com/en/abs/independent-deeply-learned-tensor-analysis-for-determined-audio-source-separation-2106.05529</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-deeply-learned-tensor-analysis-for-determined-audio-source-separation-2106.05529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-deeply-learned-tensor-analysis-for-determined-audio-source-separation-2106.05529"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-and-combination-of-unsupervised-blind-source-separation-techniques-2106.05627</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-and-combination-of-unsupervised-blind-source-separation-techniques-2106.05627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-and-combination-of-unsupervised-blind-source-separation-techniques-2106.05627"/></url>
<url><loc>https://scifaro.com/en/abs/musicbert-symbolic-music-understanding-with-large-scale-pre-training-2106.05630</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicbert-symbolic-music-understanding-with-large-scale-pre-training-2106.05630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicbert-symbolic-music-understanding-with-large-scale-pre-training-2106.05630"/></url>
<url><loc>https://scifaro.com/en/abs/u2-unified-two-pass-bidirectional-end-to-end-model-for-speech-recognition-2106.05642</loc><lastmod>2022-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u2-unified-two-pass-bidirectional-end-to-end-model-for-speech-recognition-2106.05642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u2-unified-two-pass-bidirectional-end-to-end-model-for-speech-recognition-2106.05642"/></url>
<url><loc>https://scifaro.com/en/abs/improving-multi-speaker-tts-prosody-variance-with-a-residual-encoder-and-normalizing-flows-2106.05762</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-multi-speaker-tts-prosody-variance-with-a-residual-encoder-and-normalizing-flows-2106.05762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-multi-speaker-tts-prosody-variance-with-a-residual-encoder-and-normalizing-flows-2106.05762"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-variational-autoencoder-with-adversarial-learning-for-end-to-end-text-to-speech-2106.06103</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-variational-autoencoder-with-adversarial-learning-for-end-to-end-text-to-speech-2106.06103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-variational-autoencoder-with-adversarial-learning-for-end-to-end-text-to-speech-2106.06103"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-large-scale-teacher-student-training-for-on-device-acoustic-models-2106.06126</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-large-scale-teacher-student-training-for-on-device-acoustic-models-2106.06126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-large-scale-teacher-student-training-for-on-device-acoustic-models-2106.06126"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-using-a-binary-classification-model-and-class-centroids-2106.06151</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-a-binary-classification-model-and-class-centroids-2106.06151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-a-binary-classification-model-and-class-centroids-2106.06151"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speaking-styles-in-conversational-text-to-speech-synthesis-with-graph-based-multi-modal-context-modeling-2106.06233</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speaking-styles-in-conversational-text-to-speech-synthesis-with-graph-based-multi-modal-context-modeling-2106.06233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speaking-styles-in-conversational-text-to-speech-synthesis-with-graph-based-multi-modal-context-modeling-2106.06233"/></url>
<url><loc>https://scifaro.com/en/abs/hui-audio-corpus-german-a-high-quality-tts-dataset-2106.06309</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hui-audio-corpus-german-a-high-quality-tts-dataset-2106.06309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hui-audio-corpus-german-a-high-quality-tts-dataset-2106.06309"/></url>
<url><loc>https://scifaro.com/en/abs/visualizing-classifier-adjacency-relations-a-case-study-in-speaker-verification-and-voice-anti-spoofing-2106.06362</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizing-classifier-adjacency-relations-a-case-study-in-speaker-verification-and-voice-anti-spoofing-2106.06362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizing-classifier-adjacency-relations-a-case-study-in-speaker-verification-and-voice-anti-spoofing-2106.06362"/></url>
<url><loc>https://scifaro.com/en/abs/catch-a-waveform-learning-to-generate-audio-from-a-single-short-example-2106.06426</loc><lastmod>2021-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catch-a-waveform-learning-to-generate-audio-from-a-single-short-example-2106.06426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catch-a-waveform-learning-to-generate-audio-from-a-single-short-example-2106.06426"/></url>
<url><loc>https://scifaro.com/en/abs/a-benchmark-of-dynamical-variational-autoencoders-applied-to-speech-spectrogram-modeling-2106.06500</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-benchmark-of-dynamical-variational-autoencoders-applied-to-speech-spectrogram-modeling-2106.06500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-benchmark-of-dynamical-variational-autoencoders-applied-to-speech-spectrogram-modeling-2106.06500"/></url>
<url><loc>https://scifaro.com/en/abs/a-low-compexity-deep-learning-framework-for-acoustic-scene-classification-2106.06838</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-low-compexity-deep-learning-framework-for-acoustic-scene-classification-2106.06838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-low-compexity-deep-learning-framework-for-acoustic-scene-classification-2106.06838"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-frameworks-applied-for-audio-visual-scene-classification-2106.06840</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-frameworks-applied-for-audio-visual-scene-classification-2106.06840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-frameworks-applied-for-audio-visual-scene-classification-2106.06840"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-wavelet-vocoder-based-decomposition-of-parametric-speech-waveform-synthesis-2106.06863</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-wavelet-vocoder-based-decomposition-of-parametric-speech-waveform-synthesis-2106.06863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-wavelet-vocoder-based-decomposition-of-parametric-speech-waveform-synthesis-2106.06863"/></url>
<url><loc>https://scifaro.com/en/abs/gigaspeech-an-evolving-multi-domain-asr-corpus-with-10-000-hours-of-transcribed-audio-2106.06909</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gigaspeech-an-evolving-multi-domain-asr-corpus-with-10-000-hours-of-transcribed-audio-2106.06909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gigaspeech-an-evolving-multi-domain-asr-corpus-with-10-000-hours-of-transcribed-audio-2106.06909"/></url>
<url><loc>https://scifaro.com/en/abs/sounddet-polyphonic-moving-sound-event-detection-and-localization-from-raw-waveform-2106.06969</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sounddet-polyphonic-moving-sound-event-detection-and-localization-from-raw-waveform-2106.06969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sounddet-polyphonic-moving-sound-event-detection-and-localization-from-raw-waveform-2106.06969"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-scattering-ambisonics-three-dimensional-sound-field-estimation-using-interacting-spheres-2106.07157</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-scattering-ambisonics-three-dimensional-sound-field-estimation-using-interacting-spheres-2106.07157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-scattering-ambisonics-three-dimensional-sound-field-estimation-using-interacting-spheres-2106.07157"/></url>
<url><loc>https://scifaro.com/en/abs/fasticarl-fast-incremental-classifier-and-representation-learning-with-efficient-budget-allocation-in-audio-sensing-applications-2106.07268</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fasticarl-fast-incremental-classifier-and-representation-learning-with-efficient-budget-allocation-in-audio-sensing-applications-2106.07268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fasticarl-fast-incremental-classifier-and-representation-learning-with-efficient-budget-allocation-in-audio-sensing-applications-2106.07268"/></url>
<url><loc>https://scifaro.com/en/abs/audio-attacks-and-defenses-against-aed-systems-a-practical-study-2106.07428</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-attacks-and-defenses-against-aed-systems-a-practical-study-2106.07428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-attacks-and-defenses-against-aed-systems-a-practical-study-2106.07428"/></url>
<url><loc>https://scifaro.com/en/abs/crash-raw-audio-score-based-generative-modeling-for-controllable-high-resolution-drum-sound-synthesis-2106.07431</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crash-raw-audio-score-based-generative-modeling-for-controllable-high-resolution-drum-sound-synthesis-2106.07431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crash-raw-audio-score-based-generative-modeling-for-controllable-high-resolution-drum-sound-synthesis-2106.07431"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-mapping-for-visual-to-auditory-sensory-substitution-2106.07448</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-mapping-for-visual-to-auditory-sensory-substitution-2106.07448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-mapping-for-visual-to-auditory-sensory-substitution-2106.07448"/></url>
<url><loc>https://scifaro.com/en/abs/f-t-lstm-based-complex-network-for-joint-acoustic-echo-cancellation-and-speech-enhancement-2106.07577</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f-t-lstm-based-complex-network-for-joint-acoustic-echo-cancellation-and-speech-enhancement-2106.07577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f-t-lstm-based-complex-network-for-joint-acoustic-echo-cancellation-and-speech-enhancement-2106.07577"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-visual-dereverberation-2106.07732</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-visual-dereverberation-2106.07732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-visual-dereverberation-2106.07732"/></url>
<url><loc>https://scifaro.com/en/abs/tracing-back-music-emotion-predictions-to-sound-sources-and-intuitive-perceptual-qualities-2106.07787</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tracing-back-music-emotion-predictions-to-sound-sources-and-intuitive-perceptual-qualities-2106.07787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tracing-back-music-emotion-predictions-to-sound-sources-and-intuitive-perceptual-qualities-2106.07787"/></url>
<url><loc>https://scifaro.com/en/abs/teacher-student-mixit-for-unsupervised-and-semi-supervised-speech-separation-2106.07843</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teacher-student-mixit-for-unsupervised-and-semi-supervised-speech-separation-2106.07843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teacher-student-mixit-for-unsupervised-and-semi-supervised-speech-separation-2106.07843"/></url>
<url><loc>https://scifaro.com/en/abs/towards-the-objective-speech-assessment-of-smoking-status-based-on-voice-features-a-review-of-the-literature-2106.07874</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-the-objective-speech-assessment-of-smoking-status-based-on-voice-features-a-review-of-the-literature-2106.07874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-the-objective-speech-assessment-of-smoking-status-based-on-voice-features-a-review-of-the-literature-2106.07874"/></url>
<url><loc>https://scifaro.com/en/abs/mlp-singer-towards-rapid-parallel-korean-singing-voice-synthesis-2106.07886</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mlp-singer-towards-rapid-parallel-korean-singing-voice-synthesis-2106.07886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mlp-singer-towards-rapid-parallel-korean-singing-voice-synthesis-2106.07886"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-margin-circle-loss-for-speaker-verification-2106.08004</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-margin-circle-loss-for-speaker-verification-2106.08004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-margin-circle-loss-for-speaker-verification-2106.08004"/></url>
<url><loc>https://scifaro.com/en/abs/graph-based-label-propagation-for-semi-supervised-speaker-identification-2106.08207</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-based-label-propagation-for-semi-supervised-speaker-identification-2106.08207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-based-label-propagation-for-semi-supervised-speaker-identification-2106.08207"/></url>
<url><loc>https://scifaro.com/en/abs/pathological-voice-adaptation-with-autoencoder-based-voice-conversion-2106.08427</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pathological-voice-adaptation-with-autoencoder-based-voice-conversion-2106.08427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pathological-voice-adaptation-with-autoencoder-based-voice-conversion-2106.08427"/></url>
<url><loc>https://scifaro.com/en/abs/tonal-frequencies-consonance-dissonance-a-math-bio-intersection-2106.08479</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tonal-frequencies-consonance-dissonance-a-math-bio-intersection-2106.08479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tonal-frequencies-consonance-dissonance-a-math-bio-intersection-2106.08479"/></url>
<url><loc>https://scifaro.com/en/abs/wsrglow-a-glow-based-waveform-generative-model-for-audio-super-resolution-2106.08507</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wsrglow-a-glow-based-waveform-generative-model-for-audio-super-resolution-2106.08507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wsrglow-a-glow-based-waveform-generative-model-for-audio-super-resolution-2106.08507"/></url>
<url><loc>https://scifaro.com/en/abs/drum-aware-ensemble-architecture-for-improved-joint-musical-beat-and-downbeat-tracking-2106.08685</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drum-aware-ensemble-architecture-for-improved-joint-musical-beat-and-downbeat-tracking-2106.08685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drum-aware-ensemble-architecture-for-improved-joint-musical-beat-and-downbeat-tracking-2106.08685"/></url>
<url><loc>https://scifaro.com/en/abs/source-separation-based-data-augmentation-for-improved-joint-beat-and-downbeat-tracking-2106.08703</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-separation-based-data-augmentation-for-improved-joint-beat-and-downbeat-tracking-2106.08703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-separation-based-data-augmentation-for-improved-joint-beat-and-downbeat-tracking-2106.08703"/></url>
<url><loc>https://scifaro.com/en/abs/voicy-zero-shot-non-parallel-voice-conversion-in-noisy-reverberant-environments-2106.08873</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicy-zero-shot-non-parallel-voice-conversion-in-noisy-reverberant-environments-2106.08873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicy-zero-shot-non-parallel-voice-conversion-in-noisy-reverberant-environments-2106.08873"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-conformer-with-prob-sparse-attention-mechanism-for-end-to-endspeech-recognition-2106.09236</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-conformer-with-prob-sparse-attention-mechanism-for-end-to-endspeech-recognition-2106.09236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-conformer-with-prob-sparse-attention-mechanism-for-end-to-endspeech-recognition-2106.09236"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-transfer-learning-from-near-field-to-far-field-speaker-verification-2106.09320</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-transfer-learning-from-near-field-to-far-field-speaker-verification-2106.09320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-transfer-learning-from-near-field-to-far-field-speaker-verification-2106.09320"/></url>
<url><loc>https://scifaro.com/en/abs/improving-on-screen-sound-separation-for-open-domain-videos-with-audio-visual-self-attention-2106.09669</loc><lastmod>2021-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-on-screen-sound-separation-for-open-domain-videos-with-audio-visual-self-attention-2106.09669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-on-screen-sound-separation-for-open-domain-videos-with-audio-visual-self-attention-2106.09669"/></url>
<url><loc>https://scifaro.com/en/abs/improving-performance-of-seen-and-unseen-speech-style-transfer-in-end-to-end-neural-tts-2106.10003</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-performance-of-seen-and-unseen-speech-style-transfer-in-end-to-end-neural-tts-2106.10003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-performance-of-seen-and-unseen-speech-style-transfer-in-end-to-end-neural-tts-2106.10003"/></url>
<url><loc>https://scifaro.com/en/abs/synchronising-speech-segments-with-musical-beats-in-mandarin-and-english-singing-2106.10045</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synchronising-speech-segments-with-musical-beats-in-mandarin-and-english-singing-2106.10045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synchronising-speech-segments-with-musical-beats-in-mandarin-and-english-singing-2106.10045"/></url>
<url><loc>https://scifaro.com/en/abs/improving-robustness-of-one-shot-voice-conversion-with-deep-discriminative-speaker-encoder-2106.10406</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-robustness-of-one-shot-voice-conversion-with-deep-discriminative-speaker-encoder-2106.10406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-robustness-of-one-shot-voice-conversion-with-deep-discriminative-speaker-encoder-2106.10406"/></url>
<url><loc>https://scifaro.com/en/abs/advances-in-speech-vocoding-for-text-to-speech-with-continuous-parameters-2106.10481</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advances-in-speech-vocoding-for-text-to-speech-with-continuous-parameters-2106.10481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advances-in-speech-vocoding-for-text-to-speech-with-continuous-parameters-2106.10481"/></url>
<url><loc>https://scifaro.com/en/abs/eml-online-speech-activity-detection-for-the-fearless-steps-challenge-phase-iii-2106.11075</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eml-online-speech-activity-detection-for-the-fearless-steps-challenge-phase-iii-2106.11075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eml-online-speech-activity-detection-for-the-fearless-steps-challenge-phase-iii-2106.11075"/></url>
<url><loc>https://scifaro.com/en/abs/affinity-mixup-for-weakly-supervised-sound-event-detection-2106.11233</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/affinity-mixup-for-weakly-supervised-sound-event-detection-2106.11233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/affinity-mixup-for-weakly-supervised-sound-event-detection-2106.11233"/></url>
<url><loc>https://scifaro.com/en/abs/do-sound-event-representations-generalize-to-other-audio-tasks-a-case-study-in-audio-transfer-learning-2106.11335</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-sound-event-representations-generalize-to-other-audio-tasks-a-case-study-in-audio-transfer-learning-2106.11335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-sound-event-representations-generalize-to-other-audio-tasks-a-case-study-in-audio-transfer-learning-2106.11335"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-cross-modal-fusion-for-audio-visual-voice-activity-detection-in-musical-video-streams-2106.11411</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-cross-modal-fusion-for-audio-visual-voice-activity-detection-in-musical-video-streams-2106.11411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-cross-modal-fusion-for-audio-visual-voice-activity-detection-in-musical-video-streams-2106.11411"/></url>
<url><loc>https://scifaro.com/en/abs/key-sparse-transformer-for-multimodal-speech-emotion-recognition-2106.11532</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/key-sparse-transformer-for-multimodal-speech-emotion-recognition-2106.11532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/key-sparse-transformer-for-multimodal-speech-emotion-recognition-2106.11532"/></url>
<url><loc>https://scifaro.com/en/abs/multi-accent-speech-separation-with-one-shot-learning-2106.11713</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-accent-speech-separation-with-one-shot-learning-2106.11713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-accent-speech-separation-with-one-shot-learning-2106.11713"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-inference-with-early-exit-in-the-progressive-speech-enhancement-2106.11730</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-inference-with-early-exit-in-the-progressive-speech-enhancement-2106.11730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-inference-with-early-exit-in-the-progressive-speech-enhancement-2106.11730"/></url>
<url><loc>https://scifaro.com/en/abs/glance-and-gaze-a-collaborative-learning-framework-for-single-channel-speech-enhancement-2106.11789</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glance-and-gaze-a-collaborative-learning-framework-for-single-channel-speech-enhancement-2106.11789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glance-and-gaze-a-collaborative-learning-framework-for-single-channel-speech-enhancement-2106.11789"/></url>
<url><loc>https://scifaro.com/en/abs/enrollment-less-training-for-personalized-voice-activity-detection-2106.12132</loc><lastmod>2021-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enrollment-less-training-for-personalized-voice-activity-detection-2106.12132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enrollment-less-training-for-personalized-voice-activity-detection-2106.12132"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-enhancement-using-dynamical-variational-auto-encoders-2106.12271</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-using-dynamical-variational-auto-encoders-2106.12271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-using-dynamical-variational-auto-encoders-2106.12271"/></url>
<url><loc>https://scifaro.com/en/abs/a-simultaneous-denoising-and-dereverberation-framework-with-target-decoupling-2106.12743</loc><lastmod>2021-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simultaneous-denoising-and-dereverberation-framework-with-target-decoupling-2106.12743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simultaneous-denoising-and-dereverberation-framework-with-target-decoupling-2106.12743"/></url>
<url><loc>https://scifaro.com/en/abs/additive-phoneme-aware-margin-softmax-loss-for-language-recognition-2106.12851</loc><lastmod>2021-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/additive-phoneme-aware-margin-softmax-loss-for-language-recognition-2106.12851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/additive-phoneme-aware-margin-softmax-loss-for-language-recognition-2106.12851"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-tts-with-explicit-duration-modelling-for-low-resource-highly-expressive-speech-2106.12896</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-tts-with-explicit-duration-modelling-for-low-resource-highly-expressive-speech-2106.12896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-tts-with-explicit-duration-modelling-for-low-resource-highly-expressive-speech-2106.12896"/></url>
<url><loc>https://scifaro.com/en/abs/speech-is-silver-silence-is-golden-what-do-asvspoof-trained-models-really-learn-2106.12914</loc><lastmod>2021-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-is-silver-silence-is-golden-what-do-asvspoof-trained-models-really-learn-2106.12914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-is-silver-silence-is-golden-what-do-asvspoof-trained-models-really-learn-2106.12914"/></url>
<url><loc>https://scifaro.com/en/abs/sofamyroom-a-fast-and-multiplatform-shoebox-room-simulator-for-binaural-room-impulse-response-dataset-generation-2106.12992</loc><lastmod>2021-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sofamyroom-a-fast-and-multiplatform-shoebox-room-simulator-for-binaural-room-impulse-response-dataset-generation-2106.12992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sofamyroom-a-fast-and-multiplatform-shoebox-room-simulator-for-binaural-room-impulse-response-dataset-generation-2106.12992"/></url>
<url><loc>https://scifaro.com/en/abs/audioclip-extending-clip-to-image-text-and-audio-2106.13043</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioclip-extending-clip-to-image-text-and-audio-2106.13043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioclip-extending-clip-to-image-text-and-audio-2106.13043"/></url>
<url><loc>https://scifaro.com/en/abs/basis-melgan-efficient-neural-vocoder-based-on-audio-decomposition-2106.13419</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/basis-melgan-efficient-neural-vocoder-based-on-audio-decomposition-2106.13419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/basis-melgan-efficient-neural-vocoder-based-on-audio-decomposition-2106.13419"/></url>
<url><loc>https://scifaro.com/en/abs/preliminary-study-on-using-vector-quantization-latent-spaces-for-tts-vc-systems-with-consistent-performance-2106.13479</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preliminary-study-on-using-vector-quantization-latent-spaces-for-tts-vc-systems-with-consistent-performance-2106.13479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preliminary-study-on-using-vector-quantization-latent-spaces-for-tts-vc-systems-with-consistent-performance-2106.13479"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-deep-learning-based-voice-activity-detectors-and-room-impulse-response-models-in-reverberant-environments-2106.13511</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-deep-learning-based-voice-activity-detectors-and-room-impulse-response-models-in-reverberant-environments-2106.13511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-deep-learning-based-voice-activity-detectors-and-room-impulse-response-models-in-reverberant-environments-2106.13511"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-aware-and-channel-wise-attentive-learning-for-text-dependentspeaker-verification-2106.13514</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-aware-and-channel-wise-attentive-learning-for-text-dependentspeaker-verification-2106.13514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-aware-and-channel-wise-attentive-learning-for-text-dependentspeaker-verification-2106.13514"/></url>
<url><loc>https://scifaro.com/en/abs/deep-residual-echo-suppression-with-a-tunable-tradeoff-between-signal-distortion-and-echo-suppression-2106.13531</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-residual-echo-suppression-with-a-tunable-tradeoff-between-signal-distortion-and-echo-suppression-2106.13531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-residual-echo-suppression-with-a-tunable-tradeoff-between-signal-distortion-and-echo-suppression-2106.13531"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-acoustic-echo-cancellation-with-deep-learning-2106.13754</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-acoustic-echo-cancellation-with-deep-learning-2106.13754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-acoustic-echo-cancellation-with-deep-learning-2106.13754"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-for-transient-noisy-environment-based-on-diffusion-nets-2106.13763</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-for-transient-noisy-environment-based-on-diffusion-nets-2106.13763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-for-transient-noisy-environment-based-on-diffusion-nets-2106.13763"/></url>
<url><loc>https://scifaro.com/en/abs/transflower-probabilistic-autoregressive-dance-generation-with-multimodal-attention-2106.13871</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transflower-probabilistic-autoregressive-dance-generation-with-multimodal-attention-2106.13871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transflower-probabilistic-autoregressive-dance-generation-with-multimodal-attention-2106.13871"/></url>
<url><loc>https://scifaro.com/en/abs/listen-as-you-wish-audio-based-event-detection-via-text-to-audio-grounding-in-smart-cities-2106.14136</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-as-you-wish-audio-based-event-detection-via-text-to-audio-grounding-in-smart-cities-2106.14136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-as-you-wish-audio-based-event-detection-via-text-to-audio-grounding-in-smart-cities-2106.14136"/></url>
<url><loc>https://scifaro.com/en/abs/sparsely-overlapped-speech-training-in-the-time-domain-joint-learning-of-target-speech-separation-and-personal-vad-benefits-2106.14371</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsely-overlapped-speech-training-in-the-time-domain-joint-learning-of-target-speech-separation-and-personal-vad-benefits-2106.14371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsely-overlapped-speech-training-in-the-time-domain-joint-learning-of-target-speech-separation-and-personal-vad-benefits-2106.14371"/></url>
<url><loc>https://scifaro.com/en/abs/sounds-of-covid-19-exploring-realistic-performance-of-audio-based-digital-testing-2106.15523</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sounds-of-covid-19-exploring-realistic-performance-of-audio-based-digital-testing-2106.15523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sounds-of-covid-19-exploring-realistic-performance-of-audio-based-digital-testing-2106.15523"/></url>
<url><loc>https://scifaro.com/en/abs/robust-and-interpretable-temporal-convolution-network-for-event-detection-in-lung-sound-recordings-2106.15835</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-and-interpretable-temporal-convolution-network-for-event-detection-in-lung-sound-recordings-2106.15835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-and-interpretable-temporal-convolution-network-for-event-detection-in-lung-sound-recordings-2106.15835"/></url>
<url><loc>https://scifaro.com/en/abs/communication-conditions-in-virtual-acoustic-scenes-in-an-underground-station-2106.15916</loc><lastmod>2025-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/communication-conditions-in-virtual-acoustic-scenes-in-an-underground-station-2106.15916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/communication-conditions-in-virtual-acoustic-scenes-in-an-underground-station-2106.15916"/></url>
<url><loc>https://scifaro.com/en/abs/a-generative-model-for-raw-audio-using-transformer-architectures-2106.16036</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generative-model-for-raw-audio-using-transformer-architectures-2106.16036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generative-model-for-raw-audio-using-transformer-architectures-2106.16036"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2107.00178</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2107.00178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2107.00178"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-singing-voice-separation-2107.00231</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-singing-voice-separation-2107.00231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-singing-voice-separation-2107.00231"/></url>
<url><loc>https://scifaro.com/en/abs/sonority-measurement-using-system-source-and-suprasegmental-information-2107.00297</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonority-measurement-using-system-source-and-suprasegmental-information-2107.00297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonority-measurement-using-system-source-and-suprasegmental-information-2107.00297"/></url>
<url><loc>https://scifaro.com/en/abs/an-objective-evaluation-framework-for-pathological-speech-synthesis-2107.00308</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-objective-evaluation-framework-for-pathological-speech-synthesis-2107.00308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-objective-evaluation-framework-for-pathological-speech-synthesis-2107.00308"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-sample-detection-for-speaker-verification-by-neural-vocoders-2107.00309</loc><lastmod>2022-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-sample-detection-for-speaker-verification-by-neural-vocoders-2107.00309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-sample-detection-for-speaker-verification-by-neural-vocoders-2107.00309"/></url>
<url><loc>https://scifaro.com/en/abs/improving-sound-event-classification-by-increasing-shift-invariance-in-convolutional-neural-networks-2107.00623</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-sound-event-classification-by-increasing-shift-invariance-in-convolutional-neural-networks-2107.00623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-sound-event-classification-by-increasing-shift-invariance-in-convolutional-neural-networks-2107.00623"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-contrastive-learning-for-accented-speech-recognition-2107.00921</loc><lastmod>2021-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-contrastive-learning-for-accented-speech-recognition-2107.00921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-contrastive-learning-for-accented-speech-recognition-2107.00921"/></url>
<url><loc>https://scifaro.com/en/abs/crowdspeech-and-voxdiy-benchmark-datasets-for-crowdsourced-audio-transcription-2107.01091</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdspeech-and-voxdiy-benchmark-datasets-for-crowdsourced-audio-transcription-2107.01091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdspeech-and-voxdiy-benchmark-datasets-for-crowdsourced-audio-transcription-2107.01091"/></url>
<url><loc>https://scifaro.com/en/abs/the-hccl-speaker-verification-system-for-far-field-speaker-verification-challenge-2107.01329</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hccl-speaker-verification-system-for-far-field-speaker-verification-challenge-2107.01329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hccl-speaker-verification-system-for-far-field-speaker-verification-challenge-2107.01329"/></url>
<url><loc>https://scifaro.com/en/abs/a-lottery-ticket-hypothesis-framework-for-low-complexity-device-robust-neural-acoustic-scene-classification-2107.01461</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lottery-ticket-hypothesis-framework-for-low-complexity-device-robust-neural-acoustic-scene-classification-2107.01461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lottery-ticket-hypothesis-framework-for-low-complexity-device-robust-neural-acoustic-scene-classification-2107.01461"/></url>
<url><loc>https://scifaro.com/en/abs/development-of-a-conversation-state-prediction-system-2107.01462</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-of-a-conversation-state-prediction-system-2107.01462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-of-a-conversation-state-prediction-system-2107.01462"/></url>
<url><loc>https://scifaro.com/en/abs/deeprapper-neural-rap-generation-with-rhyme-and-rhythm-modeling-2107.01875</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deeprapper-neural-rap-generation-with-rhyme-and-rhythm-modeling-2107.01875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deeprapper-neural-rap-generation-with-rhyme-and-rhythm-modeling-2107.01875"/></url>
<url><loc>https://scifaro.com/en/abs/adaspeech-3-adaptive-text-to-speech-for-spontaneous-style-2107.02530</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaspeech-3-adaptive-text-to-speech-for-spontaneous-style-2107.02530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaspeech-3-adaptive-text-to-speech-for-spontaneous-style-2107.02530"/></url>
<url><loc>https://scifaro.com/en/abs/self-training-with-noisy-student-model-and-semi-supervised-loss-function-for-dcase-2021-challenge-task-4-2107.02569</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-training-with-noisy-student-model-and-semi-supervised-loss-function-for-dcase-2021-challenge-task-4-2107.02569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-training-with-noisy-student-model-and-semi-supervised-loss-function-for-dcase-2021-challenge-task-4-2107.02569"/></url>
<url><loc>https://scifaro.com/en/abs/msdtron-a-high-capability-multi-speaker-speech-synthesis-system-for-diverse-data-using-characteristic-information-2107.03065</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msdtron-a-high-capability-multi-speaker-speech-synthesis-system-for-diverse-data-using-characteristic-information-2107.03065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msdtron-a-high-capability-multi-speaker-speech-synthesis-system-for-diverse-data-using-characteristic-information-2107.03065"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-auto-encoding-for-packet-loss-concealment-2107.03100</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-auto-encoding-for-packet-loss-concealment-2107.03100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-auto-encoding-for-packet-loss-concealment-2107.03100"/></url>
<url><loc>https://scifaro.com/en/abs/maccif-tdnn-multi-aspect-aggregation-of-channel-and-context-interdependence-features-in-tdnn-based-speaker-verification-2107.03104</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maccif-tdnn-multi-aspect-aggregation-of-channel-and-context-interdependence-features-in-tdnn-based-speaker-verification-2107.03104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maccif-tdnn-multi-aspect-aggregation-of-channel-and-context-interdependence-features-in-tdnn-based-speaker-verification-2107.03104"/></url>
<url><loc>https://scifaro.com/en/abs/vaenar-tts-variational-auto-encoder-based-non-autoregressive-text-to-speech-synthesis-2107.03298</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vaenar-tts-variational-auto-encoder-based-non-autoregressive-text-to-speech-synthesis-2107.03298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vaenar-tts-variational-auto-encoder-based-non-autoregressive-text-to-speech-synthesis-2107.03298"/></url>
<url><loc>https://scifaro.com/en/abs/soundstream-an-end-to-end-neural-audio-codec-2107.03312</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundstream-an-end-to-end-neural-audio-codec-2107.03312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundstream-an-end-to-end-neural-audio-codec-2107.03312"/></url>
<url><loc>https://scifaro.com/en/abs/bumblebee-a-transformer-for-music-2107.03443</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bumblebee-a-transformer-for-music-2107.03443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bumblebee-a-transformer-for-music-2107.03443"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-for-stuttering-identification-review-challenges-and-future-directions-2107.04057</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-for-stuttering-identification-review-challenges-and-future-directions-2107.04057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-for-stuttering-identification-review-challenges-and-future-directions-2107.04057"/></url>
<url><loc>https://scifaro.com/en/abs/easycom-an-augmented-reality-dataset-to-support-algorithms-for-easy-communication-in-noisy-environments-2107.04174</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/easycom-an-augmented-reality-dataset-to-support-algorithms-for-easy-communication-in-noisy-environments-2107.04174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/easycom-an-augmented-reality-dataset-to-support-algorithms-for-easy-communication-in-noisy-environments-2107.04174"/></url>
<url><loc>https://scifaro.com/en/abs/multi-path-convolutional-neural-networks-efficiently-improve-feature-extraction-in-continuous-adventitious-lung-sound-detection-2107.04226</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-path-convolutional-neural-networks-efficiently-improve-feature-extraction-in-continuous-adventitious-lung-sound-detection-2107.04226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-path-convolutional-neural-networks-efficiently-improve-feature-extraction-in-continuous-adventitious-lung-sound-detection-2107.04226"/></url>
<url><loc>https://scifaro.com/en/abs/a-dual-purpose-deep-learning-model-for-auscultated-lung-and-tracheal-sound-analysis-based-on-mixed-set-training-2107.04229</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dual-purpose-deep-learning-model-for-auscultated-lung-and-tracheal-sound-analysis-based-on-mixed-set-training-2107.04229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dual-purpose-deep-learning-model-for-auscultated-lung-and-tracheal-sound-analysis-based-on-mixed-set-training-2107.04229"/></url>
<url><loc>https://scifaro.com/en/abs/variational-information-bottleneck-for-effective-low-resource-audio-classification-2107.04803</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-information-bottleneck-for-effective-low-resource-audio-classification-2107.04803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-information-bottleneck-for-effective-low-resource-audio-classification-2107.04803"/></url>
<url><loc>https://scifaro.com/en/abs/speech2video-cross-modal-distillation-for-speech-to-video-generation-2107.04806</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech2video-cross-modal-distillation-for-speech-to-video-generation-2107.04806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech2video-cross-modal-distillation-for-speech-to-video-generation-2107.04806"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-classification-and-detection-of-bird-sounds-in-the-wild-a-birdclef-2021-solution-2107.04878</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-classification-and-detection-of-bird-sounds-in-the-wild-a-birdclef-2021-solution-2107.04878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-classification-and-detection-of-bird-sounds-in-the-wild-a-birdclef-2021-solution-2107.04878"/></url>
<url><loc>https://scifaro.com/en/abs/reconvat-a-semi-supervised-automatic-music-transcription-framework-for-low-resource-real-world-data-2107.04954</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconvat-a-semi-supervised-automatic-music-transcription-framework-for-low-resource-real-world-data-2107.04954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconvat-a-semi-supervised-automatic-music-transcription-framework-for-low-resource-real-world-data-2107.04954"/></url>
<url><loc>https://scifaro.com/en/abs/pocketvae-a-two-step-model-for-groove-generation-and-control-2107.05009</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pocketvae-a-two-step-model-for-groove-generation-and-control-2107.05009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pocketvae-a-two-step-model-for-groove-generation-and-control-2107.05009"/></url>
<url><loc>https://scifaro.com/en/abs/neural-waveshaping-synthesis-2107.05050</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-waveshaping-synthesis-2107.05050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-waveshaping-synthesis-2107.05050"/></url>
<url><loc>https://scifaro.com/en/abs/bert-like-pre-training-for-symbolic-piano-music-classification-tasks-2107.05223</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bert-like-pre-training-for-symbolic-piano-music-classification-tasks-2107.05223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bert-like-pre-training-for-symbolic-piano-music-classification-tasks-2107.05223"/></url>
<url><loc>https://scifaro.com/en/abs/oriental-language-recognition-olr-2020-summary-and-analysis-2107.05365</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/oriental-language-recognition-olr-2020-summary-and-analysis-2107.05365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/oriental-language-recognition-olr-2020-summary-and-analysis-2107.05365"/></url>
<url><loc>https://scifaro.com/en/abs/dpcrn-dual-path-convolution-recurrent-network-for-single-channel-speech-enhancement-2107.05429</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpcrn-dual-path-convolution-recurrent-network-for-single-channel-speech-enhancement-2107.05429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpcrn-dual-path-convolution-recurrent-network-for-single-channel-speech-enhancement-2107.05429"/></url>
<url><loc>https://scifaro.com/en/abs/calliope-a-polyphonic-music-transformer-2107.05546</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calliope-a-polyphonic-music-transformer-2107.05546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calliope-a-polyphonic-music-transformer-2107.05546"/></url>
<url><loc>https://scifaro.com/en/abs/codified-audio-language-modeling-learns-useful-representations-for-music-information-retrieval-2107.05677</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codified-audio-language-modeling-learns-useful-representations-for-music-information-retrieval-2107.05677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codified-audio-language-modeling-learns-useful-representations-for-music-information-retrieval-2107.05677"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representation-learning-combining-conformer-cpc-with-deep-cluster-for-the-zerospeech-challenge-2021-2107.05899</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representation-learning-combining-conformer-cpc-with-deep-cluster-for-the-zerospeech-challenge-2021-2107.05899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representation-learning-combining-conformer-cpc-with-deep-cluster-for-the-zerospeech-challenge-2021-2107.05899"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-based-end-to-end-speech-recognition-with-rotary-position-embedding-2107.05907</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-based-end-to-end-speech-recognition-with-rotary-position-embedding-2107.05907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-based-end-to-end-speech-recognition-with-rotary-position-embedding-2107.05907"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automatic-instrumentation-by-learning-to-separate-parts-in-symbolic-multitrack-music-2107.05916</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automatic-instrumentation-by-learning-to-separate-parts-in-symbolic-multitrack-music-2107.05916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automatic-instrumentation-by-learning-to-separate-parts-in-symbolic-multitrack-music-2107.05916"/></url>
<url><loc>https://scifaro.com/en/abs/the-piano-inpainting-application-2107.05944</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-piano-inpainting-application-2107.05944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-piano-inpainting-application-2107.05944"/></url>
<url><loc>https://scifaro.com/en/abs/dicova-net-diagnosing-covid-19-using-acoustics-based-on-deep-residual-network-for-the-dicova-challenge-2021-2107.06126</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dicova-net-diagnosing-covid-19-using-acoustics-based-on-deep-residual-network-for-the-dicova-challenge-2021-2107.06126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dicova-net-diagnosing-covid-19-using-acoustics-based-on-deep-residual-network-for-the-dicova-challenge-2021-2107.06126"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-classification-of-musical-instruments-with-a-deep-learning-multi-head-attention-based-model-2107.06231</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-classification-of-musical-instruments-with-a-deep-learning-multi-head-attention-based-model-2107.06231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-classification-of-musical-instruments-with-a-deep-learning-multi-head-attention-based-model-2107.06231"/></url>
<url><loc>https://scifaro.com/en/abs/dance2music-automatic-dance-driven-music-generation-2107.06252</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dance2music-automatic-dance-driven-music-generation-2107.06252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dance2music-automatic-dance-driven-music-generation-2107.06252"/></url>
<url><loc>https://scifaro.com/en/abs/serialized-multi-layer-multi-head-attention-for-neural-speaker-embedding-2107.06493</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serialized-multi-layer-multi-head-attention-for-neural-speaker-embedding-2107.06493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serialized-multi-layer-multi-head-attention-for-neural-speaker-embedding-2107.06493"/></url>
<url><loc>https://scifaro.com/en/abs/the-period-modulated-harmonic-locked-loop-pm-hll-a-low-effort-algorithm-for-rapid-time-domain-multi-periodicity-estimation-2107.06645</loc><lastmod>2021-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-period-modulated-harmonic-locked-loop-pm-hll-a-low-effort-algorithm-for-rapid-time-domain-multi-periodicity-estimation-2107.06645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-period-modulated-harmonic-locked-loop-pm-hll-a-low-effort-algorithm-for-rapid-time-domain-multi-periodicity-estimation-2107.06645"/></url>
<url><loc>https://scifaro.com/en/abs/localization-based-sequential-grouping-for-continuous-speech-separation-2107.06853</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-based-sequential-grouping-for-continuous-speech-separation-2107.06853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-based-sequential-grouping-for-continuous-speech-separation-2107.06853"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-hierarchical-structures-for-few-shot-musical-instrument-recognition-2107.07029</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-hierarchical-structures-for-few-shot-musical-instrument-recognition-2107.07029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-hierarchical-structures-for-few-shot-musical-instrument-recognition-2107.07029"/></url>
<url><loc>https://scifaro.com/en/abs/objective-metrics-to-evaluate-residual-echo-suppression-during-double-talk-2107.07471</loc><lastmod>2021-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-metrics-to-evaluate-residual-echo-suppression-during-double-talk-2107.07471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-metrics-to-evaluate-residual-echo-suppression-during-double-talk-2107.07471"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-bird-species-in-diverse-soundscapes-under-weak-supervision-2107.07728</loc><lastmod>2021-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-bird-species-in-diverse-soundscapes-under-weak-supervision-2107.07728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-bird-species-in-diverse-soundscapes-under-weak-supervision-2107.07728"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-machine-learning-framework-for-teacher-vocal-delivery-evaluation-2107.07956</loc><lastmod>2021-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-machine-learning-framework-for-teacher-vocal-delivery-evaluation-2107.07956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-machine-learning-framework-for-teacher-vocal-delivery-evaluation-2107.07956"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-automated-audio-captioning-using-the-learning-without-forgetting-approach-2107.08028</loc><lastmod>2021-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-automated-audio-captioning-using-the-learning-without-forgetting-approach-2107.08028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-automated-audio-captioning-using-the-learning-without-forgetting-approach-2107.08028"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-stargan-for-emotional-voice-conversion-enhancing-voice-quality-and-data-augmentation-2107.08361</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-stargan-for-emotional-voice-conversion-enhancing-voice-quality-and-data-augmentation-2107.08361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-stargan-for-emotional-voice-conversion-enhancing-voice-quality-and-data-augmentation-2107.08361"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-a-six-hole-recorder-flute-s-response-to-breath-pressure-variations-and-fitting-a-model-2107.08727</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-a-six-hole-recorder-flute-s-response-to-breath-pressure-variations-and-fitting-a-model-2107.08727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-a-six-hole-recorder-flute-s-response-to-breath-pressure-variations-and-fitting-a-model-2107.08727"/></url>
<url><loc>https://scifaro.com/en/abs/over-parameterization-and-generalization-in-audio-classification-2107.08933</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/over-parameterization-and-generalization-in-audio-classification-2107.08933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/over-parameterization-and-generalization-in-audio-classification-2107.08933"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-piano-transcription-with-transformers-2107.09142</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-piano-transcription-with-transformers-2107.09142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-piano-transcription-with-transformers-2107.09142"/></url>
<url><loc>https://scifaro.com/en/abs/music-tempo-estimation-via-neural-networks-a-comparative-analysis-2107.09208</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-tempo-estimation-via-neural-networks-a-comparative-analysis-2107.09208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-tempo-estimation-via-neural-networks-a-comparative-analysis-2107.09208"/></url>
<url><loc>https://scifaro.com/en/abs/robust-deep-learning-frameworks-for-acoustic-scene-and-respiratory-sound-classification-2107.09268</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-deep-learning-frameworks-for-acoustic-scene-and-respiratory-sound-classification-2107.09268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-deep-learning-frameworks-for-acoustic-scene-and-respiratory-sound-classification-2107.09268"/></url>
<url><loc>https://scifaro.com/en/abs/joint-echo-cancellation-and-noise-suppression-based-on-cascaded-magnitude-and-complex-mask-estimation-2107.09298</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-echo-cancellation-and-noise-suppression-based-on-cascaded-magnitude-and-complex-mask-estimation-2107.09298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-echo-cancellation-and-noise-suppression-based-on-cascaded-magnitude-and-complex-mask-estimation-2107.09298"/></url>
<url><loc>https://scifaro.com/en/abs/persa-a-deep-learning-front-end-for-context-agnostic-audio-classification-2107.09311</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/persa-a-deep-learning-front-end-for-context-agnostic-audio-classification-2107.09311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/persa-a-deep-learning-front-end-for-context-agnostic-audio-classification-2107.09311"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-speaker-diarization-system-based-on-spatial-spectrum-2107.09321</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-speaker-diarization-system-based-on-spatial-spectrum-2107.09321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-speaker-diarization-system-based-on-spatial-spectrum-2107.09321"/></url>
<url><loc>https://scifaro.com/en/abs/assessment-of-self-attention-on-learned-features-for-sound-event-localization-and-detection-2107.09388</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessment-of-self-attention-on-learned-features-for-sound-event-localization-and-detection-2107.09388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessment-of-self-attention-on-learned-features-for-sound-event-localization-and-detection-2107.09388"/></url>
<url><loc>https://scifaro.com/en/abs/on-prosody-modeling-for-asr-tts-based-voice-conversion-2107.09477</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-prosody-modeling-for-asr-tts-based-voice-conversion-2107.09477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-prosody-modeling-for-asr-tts-based-voice-conversion-2107.09477"/></url>
<url><loc>https://scifaro.com/en/abs/melody-structure-transfer-network-generating-music-with-separable-self-attention-2107.09877</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-structure-transfer-network-generating-music-with-separable-self-attention-2107.09877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-structure-transfer-network-generating-music-with-separable-self-attention-2107.09877"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-music-plagiarism-detection-revealing-plagiarists-through-bipartite-graph-matching-and-a-comprehensive-large-scale-dataset-2107.09889</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-music-plagiarism-detection-revealing-plagiarists-through-bipartite-graph-matching-and-a-comprehensive-large-scale-dataset-2107.09889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-music-plagiarism-detection-revealing-plagiarists-through-bipartite-graph-matching-and-a-comprehensive-large-scale-dataset-2107.09889"/></url>
<url><loc>https://scifaro.com/en/abs/js-fake-chorales-a-synthetic-dataset-of-polyphonic-music-with-human-annotation-2107.10388</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/js-fake-chorales-a-synthetic-dataset-of-polyphonic-music-with-human-annotation-2107.10388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/js-fake-chorales-a-synthetic-dataset-of-polyphonic-music-with-human-annotation-2107.10388"/></url>
<url><loc>https://scifaro.com/en/abs/starganv2-vc-a-diverse-unsupervised-non-parallel-framework-for-natural-sounding-voice-conversion-2107.10394</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/starganv2-vc-a-diverse-unsupervised-non-parallel-framework-for-natural-sounding-voice-conversion-2107.10394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/starganv2-vc-a-diverse-unsupervised-non-parallel-framework-for-natural-sounding-voice-conversion-2107.10394"/></url>
<url><loc>https://scifaro.com/en/abs/using-umap-to-inspect-audio-data-for-unsupervised-anomaly-detection-under-domain-shift-conditions-2107.10880</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-umap-to-inspect-audio-data-for-unsupervised-anomaly-detection-under-domain-shift-conditions-2107.10880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-umap-to-inspect-audio-data-for-unsupervised-anomaly-detection-under-domain-shift-conditions-2107.10880"/></url>
<url><loc>https://scifaro.com/en/abs/saladnet-self-attentive-multisource-localization-in-the-ambisonics-domain-2107.11066</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saladnet-self-attentive-multisource-localization-in-the-ambisonics-domain-2107.11066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saladnet-self-attentive-multisource-localization-in-the-ambisonics-domain-2107.11066"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speech-enhancement-with-2-d-convolutional-time-frequency-domain-features-and-a-pre-trained-acoustic-model-2107.11222</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speech-enhancement-with-2-d-convolutional-time-frequency-domain-features-and-a-pre-trained-acoustic-model-2107.11222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speech-enhancement-with-2-d-convolutional-time-frequency-domain-features-and-a-pre-trained-acoustic-model-2107.11222"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-automatic-music-transcription-using-tensor-algebra-2107.11250</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-automatic-music-transcription-using-tensor-algebra-2107.11250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-automatic-music-transcription-using-tensor-algebra-2107.11250"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-noise-events-at-shooting-range-using-machine-learning-2107.11453</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-noise-events-at-shooting-range-using-machine-learning-2107.11453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-noise-events-at-shooting-range-using-machine-learning-2107.11453"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-portal-occlusion-for-precomputed-interactive-sound-propagation-2107.11548</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-portal-occlusion-for-precomputed-interactive-sound-propagation-2107.11548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-portal-occlusion-for-precomputed-interactive-sound-propagation-2107.11548"/></url>
<url><loc>https://scifaro.com/en/abs/cough-detection-from-acoustic-signals-for-patient-monitoring-system-2107.11835</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cough-detection-from-acoustic-signals-for-patient-monitoring-system-2107.11835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cough-detection-from-acoustic-signals-for-patient-monitoring-system-2107.11835"/></url>
<url><loc>https://scifaro.com/en/abs/joint-direction-and-proximity-classification-of-overlapping-sound-events-from-binaural-audio-2107.12033</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-direction-and-proximity-classification-of-overlapping-sound-events-from-binaural-audio-2107.12033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-direction-and-proximity-classification-of-overlapping-sound-events-from-binaural-audio-2107.12033"/></url>
<url><loc>https://scifaro.com/en/abs/sveva-fair-a-framework-for-evaluating-fairness-in-speaker-verification-2107.12049</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sveva-fair-a-framework-for-evaluating-fairness-in-speaker-verification-2107.12049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sveva-fair-a-framework-for-evaluating-fairness-in-speaker-verification-2107.12049"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-voice-identity-conversion-manipulating-voice-attributes-by-adversarial-learning-of-structured-disentangled-representations-2107.12346</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-voice-identity-conversion-manipulating-voice-attributes-by-adversarial-learning-of-structured-disentangled-representations-2107.12346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-voice-identity-conversion-manipulating-voice-attributes-by-adversarial-learning-of-structured-disentangled-representations-2107.12346"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-style-transfer-with-prosody-bottleneck-in-neural-speech-synthesis-2107.12562</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-style-transfer-with-prosody-bottleneck-in-neural-speech-synthesis-2107.12562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-style-transfer-with-prosody-bottleneck-in-neural-speech-synthesis-2107.12562"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-score-alignment-using-deep-automatic-music-transcription-2107.12854</loc><lastmod>2022-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-score-alignment-using-deep-automatic-music-transcription-2107.12854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-score-alignment-using-deep-automatic-music-transcription-2107.12854"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-based-non-parallel-speech-enhancement-with-an-adaptive-attention-in-attention-mechanism-2107.13143</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-based-non-parallel-speech-enhancement-with-an-adaptive-attention-in-attention-mechanism-2107.13143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-based-non-parallel-speech-enhancement-with-an-adaptive-attention-in-attention-mechanism-2107.13143"/></url>
<url><loc>https://scifaro.com/en/abs/on-perceived-emotion-in-expressive-piano-performance-further-experimental-evidence-for-the-relevance-of-mid-level-perceptual-features-2107.13231</loc><lastmod>2021-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-perceived-emotion-in-expressive-piano-performance-further-experimental-evidence-for-the-relevance-of-mid-level-perceptual-features-2107.13231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-perceived-emotion-in-expressive-piano-performance-further-experimental-evidence-for-the-relevance-of-mid-level-perceptual-features-2107.13231"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-informed-instrument-assignment-using-a-deep-convolutional-network-with-multiple-kernel-shapes-2107.13617</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-informed-instrument-assignment-using-a-deep-convolutional-network-with-multiple-kernel-shapes-2107.13617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-informed-instrument-assignment-using-a-deep-convolutional-network-with-multiple-kernel-shapes-2107.13617"/></url>
<url><loc>https://scifaro.com/en/abs/blind-room-parameter-estimation-using-multiple-multichannel-speech-recordings-2107.13832</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-room-parameter-estimation-using-multiple-multichannel-speech-recordings-2107.13832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-room-parameter-estimation-using-multiple-multichannel-speech-recordings-2107.13832"/></url>
<url><loc>https://scifaro.com/en/abs/pkspell-data-driven-pitch-spelling-and-key-signature-estimation-2107.14009</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pkspell-data-driven-pitch-spelling-and-key-signature-estimation-2107.14009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pkspell-data-driven-pitch-spelling-and-key-signature-estimation-2107.14009"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-respiratory-rate-from-breath-audio-obtained-through-wearable-microphones-2107.14028</loc><lastmod>2021-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-respiratory-rate-from-breath-audio-obtained-through-wearable-microphones-2107.14028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-respiratory-rate-from-breath-audio-obtained-through-wearable-microphones-2107.14028"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-in-utterance-level-and-segmental-level-spoof-detection-2107.14132</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-in-utterance-level-and-segmental-level-spoof-detection-2107.14132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-in-utterance-level-and-segmental-level-spoof-detection-2107.14132"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-the-covid-19-identification-resnet-cider-on-the-interspeech-covid-19-from-audio-challenges-2107.14549</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-the-covid-19-identification-resnet-cider-on-the-interspeech-covid-19-from-audio-challenges-2107.14549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-the-covid-19-identification-resnet-cider-on-the-interspeech-covid-19-from-audio-challenges-2107.14549"/></url>
<url><loc>https://scifaro.com/en/abs/task3-dcase2021-challenge-sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2107.14561</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task3-dcase2021-challenge-sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2107.14561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task3-dcase2021-challenge-sound-event-localization-and-detection-using-squeeze-excitation-residual-cnns-2107.14561"/></url>
<url><loc>https://scifaro.com/en/abs/dadagp-a-dataset-of-tokenized-guitarpro-songs-for-sequence-models-2107.14653</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dadagp-a-dataset-of-tokenized-guitarpro-songs-for-sequence-models-2107.14653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dadagp-a-dataset-of-tokenized-guitarpro-songs-for-sequence-models-2107.14653"/></url>
<url><loc>https://scifaro.com/en/abs/task-1a-dcase-2021-acoustic-scene-classification-with-mismatch-devices-using-squeeze-excitation-technique-and-low-complexity-constraint-2107.14658</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-1a-dcase-2021-acoustic-scene-classification-with-mismatch-devices-using-squeeze-excitation-technique-and-low-complexity-constraint-2107.14658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-1a-dcase-2021-acoustic-scene-classification-with-mismatch-devices-using-squeeze-excitation-technique-and-low-complexity-constraint-2107.14658"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-voice-reconstruction-for-silent-speech-in-a-tonal-language-2108.00190</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-voice-reconstruction-for-silent-speech-in-a-tonal-language-2108.00190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-voice-reconstruction-for-silent-speech-in-a-tonal-language-2108.00190"/></url>
<url><loc>https://scifaro.com/en/abs/surprisenet-melody-harmonization-conditioning-on-user-controlled-surprise-contours-2108.00378</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surprisenet-melody-harmonization-conditioning-on-user-controlled-surprise-contours-2108.00378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surprisenet-melody-harmonization-conditioning-on-user-controlled-surprise-contours-2108.00378"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-bangla-speech-synthesis-2108.00500</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-bangla-speech-synthesis-2108.00500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-bangla-speech-synthesis-2108.00500"/></url>
<url><loc>https://scifaro.com/en/abs/musical-speech-a-transformer-based-composition-tool-2108.01043</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-speech-a-transformer-based-composition-tool-2108.01043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-speech-a-transformer-based-composition-tool-2108.01043"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptation-with-continuous-vocoder-based-dnn-tts-2108.01154</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptation-with-continuous-vocoder-based-dnn-tts-2108.01154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptation-with-continuous-vocoder-based-dnn-tts-2108.01154"/></url>
<url><loc>https://scifaro.com/en/abs/darkgan-exploiting-knowledge-distillation-for-comprehensible-audio-synthesis-with-gans-2108.01216</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/darkgan-exploiting-knowledge-distillation-for-comprehensible-audio-synthesis-with-gans-2108.01216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/darkgan-exploiting-knowledge-distillation-for-comprehensible-audio-synthesis-with-gans-2108.01216"/></url>
<url><loc>https://scifaro.com/en/abs/the-performance-evaluation-of-attention-based-neural-asr-under-mixed-speech-input-2108.01245</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-performance-evaluation-of-attention-based-neural-asr-under-mixed-speech-input-2108.01245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-performance-evaluation-of-attention-based-neural-asr-under-mixed-speech-input-2108.01245"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-iranian-music-intervals-based-on-pitch-histogram-2108.01283</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-iranian-music-intervals-based-on-pitch-histogram-2108.01283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-iranian-music-intervals-based-on-pitch-histogram-2108.01283"/></url>
<url><loc>https://scifaro.com/en/abs/emopia-a-multi-modal-pop-piano-dataset-for-emotion-recognition-and-emotion-based-music-generation-2108.01374</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emopia-a-multi-modal-pop-piano-dataset-for-emotion-recognition-and-emotion-based-music-generation-2108.01374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emopia-a-multi-modal-pop-piano-dataset-for-emotion-recognition-and-emotion-based-music-generation-2108.01374"/></url>
<url><loc>https://scifaro.com/en/abs/is-disentanglement-enough-on-latent-representations-for-controllable-music-generation-2108.01450</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-disentanglement-enough-on-latent-representations-for-controllable-music-generation-2108.01450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-disentanglement-enough-on-latent-representations-for-controllable-music-generation-2108.01450"/></url>
<url><loc>https://scifaro.com/en/abs/a-benchmarking-initiative-for-audio-domain-music-generation-using-the-freesound-loop-dataset-2108.01576</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-benchmarking-initiative-for-audio-domain-music-generation-using-the-freesound-loop-dataset-2108.01576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-benchmarking-initiative-for-audio-domain-music-generation-using-the-freesound-loop-dataset-2108.01576"/></url>
<url><loc>https://scifaro.com/en/abs/improving-music-performance-assessment-with-contrastive-learning-2108.01711</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-music-performance-assessment-with-contrastive-learning-2108.01711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-music-performance-assessment-with-contrastive-learning-2108.01711"/></url>
<url><loc>https://scifaro.com/en/abs/information-sieve-content-leakage-reduction-in-end-to-end-prosody-for-expressive-speech-synthesis-2108.01831</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/information-sieve-content-leakage-reduction-in-end-to-end-prosody-for-expressive-speech-synthesis-2108.01831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/information-sieve-content-leakage-reduction-in-end-to-end-prosody-for-expressive-speech-synthesis-2108.01831"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-exploitability-of-audio-machine-learning-pipelines-to-surreptitious-adversarial-examples-2108.02010</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-exploitability-of-audio-machine-learning-pipelines-to-surreptitious-adversarial-examples-2108.02010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-exploitability-of-audio-machine-learning-pipelines-to-surreptitious-adversarial-examples-2108.02010"/></url>
<url><loc>https://scifaro.com/en/abs/pervasive-hand-gesture-recognition-for-smartphones-using-non-audible-sound-and-deep-learning-2108.02148</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pervasive-hand-gesture-recognition-for-smartphones-using-non-audible-sound-and-deep-learning-2108.02148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pervasive-hand-gesture-recognition-for-smartphones-using-non-audible-sound-and-deep-learning-2108.02148"/></url>
<url><loc>https://scifaro.com/en/abs/daft-exprt-cross-speaker-prosody-transfer-on-any-text-for-expressive-speech-synthesis-2108.02271</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/daft-exprt-cross-speaker-prosody-transfer-on-any-text-for-expressive-speech-synthesis-2108.02271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/daft-exprt-cross-speaker-prosody-transfer-on-any-text-for-expressive-speech-synthesis-2108.02271"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speech-emotion-recognition-using-transfer-learning-and-spectrogram-augmentation-2108.02510</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speech-emotion-recognition-using-transfer-learning-and-spectrogram-augmentation-2108.02510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speech-emotion-recognition-using-transfer-learning-and-spectrogram-augmentation-2108.02510"/></url>
<url><loc>https://scifaro.com/en/abs/sloclas-a-database-for-joint-sound-localization-and-classification-2108.02539</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sloclas-a-database-for-joint-sound-localization-and-classification-2108.02539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sloclas-a-database-for-joint-sound-localization-and-classification-2108.02539"/></url>
<url><loc>https://scifaro.com/en/abs/performer-identification-from-symbolic-representation-of-music-using-statistical-models-2108.02576</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performer-identification-from-symbolic-representation-of-music-using-statistical-models-2108.02576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performer-identification-from-symbolic-representation-of-music-using-statistical-models-2108.02576"/></url>
<url><loc>https://scifaro.com/en/abs/mstre-net-multistreaming-acoustic-modeling-for-automatic-lyrics-transcription-2108.02625</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mstre-net-multistreaming-acoustic-modeling-for-automatic-lyrics-transcription-2108.02625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mstre-net-multistreaming-acoustic-modeling-for-automatic-lyrics-transcription-2108.02625"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-end-to-end-singing-voice-synthesis-with-encoder-decoder-architectures-2108.03008</loc><lastmod>2021-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-end-to-end-singing-voice-synthesis-with-encoder-decoder-architectures-2108.03008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-end-to-end-singing-voice-synthesis-with-encoder-decoder-architectures-2108.03008"/></url>
<url><loc>https://scifaro.com/en/abs/specmix-a-mixed-sample-data-augmentation-method-for-training-withtime-frequency-domain-features-2108.03020</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specmix-a-mixed-sample-data-augmentation-method-for-training-withtime-frequency-domain-features-2108.03020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specmix-a-mixed-sample-data-augmentation-method-for-training-withtime-frequency-domain-features-2108.03020"/></url>
<url><loc>https://scifaro.com/en/abs/the-eihw-glam-deep-attentive-multi-model-fusion-system-for-cough-based-covid-19-recognition-in-the-dicova-2021-challenge-2108.03041</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-eihw-glam-deep-attentive-multi-model-fusion-system-for-cough-based-covid-19-recognition-in-the-dicova-2021-challenge-2108.03041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-eihw-glam-deep-attentive-multi-model-fusion-system-for-cough-based-covid-19-recognition-in-the-dicova-2021-challenge-2108.03041"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-model-for-zero-shot-music-source-separation-transcription-and-synthesis-2108.03456</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-model-for-zero-shot-music-source-separation-transcription-and-synthesis-2108.03456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-model-for-zero-shot-music-source-separation-transcription-and-synthesis-2108.03456"/></url>
<url><loc>https://scifaro.com/en/abs/cough-detection-using-selected-informative-features-from-audio-signals-2108.03538</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cough-detection-using-selected-informative-features-from-audio-signals-2108.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cough-detection-using-selected-informative-features-from-audio-signals-2108.03538"/></url>
<url><loc>https://scifaro.com/en/abs/deep-single-shot-musical-instrument-identification-using-scalograms-2108.03569</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-single-shot-musical-instrument-identification-using-scalograms-2108.03569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-single-shot-musical-instrument-identification-using-scalograms-2108.03569"/></url>
<url><loc>https://scifaro.com/en/abs/audio-spectral-enhancement-leveraging-autoencoders-for-low-latency-reconstruction-of-long-lossy-audio-sequences-2108.03703</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-spectral-enhancement-leveraging-autoencoders-for-low-latency-reconstruction-of-long-lossy-audio-sequences-2108.03703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-spectral-enhancement-leveraging-autoencoders-for-low-latency-reconstruction-of-long-lossy-audio-sequences-2108.03703"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-localization-using-deep-convolutional-maxout-neural-network-in-persian-speech-recognition-2108.03818</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-localization-using-deep-convolutional-maxout-neural-network-in-persian-speech-recognition-2108.03818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-localization-using-deep-convolutional-maxout-neural-network-in-persian-speech-recognition-2108.03818"/></url>
<url><loc>https://scifaro.com/en/abs/segmentation-free-heart-pathology-detection-using-deep-learning-2108.04139</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segmentation-free-heart-pathology-detection-using-deep-learning-2108.04139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segmentation-free-heart-pathology-detection-using-deep-learning-2108.04139"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-vc-asr-stargan-based-non-parallel-voice-conversion-regularized-by-automatic-speech-recognition-2108.04395</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-vc-asr-stargan-based-non-parallel-voice-conversion-regularized-by-automatic-speech-recognition-2108.04395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-vc-asr-stargan-based-non-parallel-voice-conversion-regularized-by-automatic-speech-recognition-2108.04395"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-investigation-into-audio-pipeline-approaches-for-classifying-bird-species-2108.04449</loc><lastmod>2021-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-investigation-into-audio-pipeline-approaches-for-classifying-bird-species-2108.04449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-investigation-into-audio-pipeline-approaches-for-classifying-bird-species-2108.04449"/></url>
<url><loc>https://scifaro.com/en/abs/depth-infused-binaural-audio-generation-using-hierarchical-cross-modal-attention-2108.04906</loc><lastmod>2021-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/depth-infused-binaural-audio-generation-using-hierarchical-cross-modal-attention-2108.04906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/depth-infused-binaural-audio-generation-using-hierarchical-cross-modal-attention-2108.04906"/></url>
<url><loc>https://scifaro.com/en/abs/robust-feature-learning-on-long-duration-sounds-for-acoustic-scene-classification-2108.05008</loc><lastmod>2021-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-feature-learning-on-long-duration-sounds-for-acoustic-scene-classification-2108.05008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-feature-learning-on-long-duration-sounds-for-acoustic-scene-classification-2108.05008"/></url>
<url><loc>https://scifaro.com/en/abs/variable-length-music-score-infilling-via-xlnet-and-musically-specialized-positional-encoding-2108.05064</loc><lastmod>2021-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variable-length-music-score-infilling-via-xlnet-and-musically-specialized-positional-encoding-2108.05064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variable-length-music-score-infilling-via-xlnet-and-musically-specialized-positional-encoding-2108.05064"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-compensation-between-magnitude-and-phase-in-speech-separation-2108.05470</loc><lastmod>2022-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-compensation-between-magnitude-and-phase-in-speech-separation-2108.05470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-compensation-between-magnitude-and-phase-in-speech-separation-2108.05470"/></url>
<url><loc>https://scifaro.com/en/abs/text-anchor-based-metric-learning-for-small-footprint-keyword-spotting-2108.05516</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-anchor-based-metric-learning-for-small-footprint-keyword-spotting-2108.05516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-anchor-based-metric-learning-for-small-footprint-keyword-spotting-2108.05516"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-voice-activity-detector-for-downsampled-audio-data-an-experiment-report-2108.05553</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-voice-activity-detector-for-downsampled-audio-data-an-experiment-report-2108.05553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-voice-activity-detector-for-downsampled-audio-data-an-experiment-report-2108.05553"/></url>
<url><loc>https://scifaro.com/en/abs/rw-resnet-a-novel-speech-anti-spoofing-model-using-raw-waveform-2108.05684</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rw-resnet-a-novel-speech-anti-spoofing-model-using-raw-waveform-2108.05684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rw-resnet-a-novel-speech-anti-spoofing-model-using-raw-waveform-2108.05684"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-tuning-of-time-frequency-masking-algorithms-for-reverberant-artifact-removal-within-the-cochlear-implant-stimulus-2108.05929</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-tuning-of-time-frequency-masking-algorithms-for-reverberant-artifact-removal-within-the-cochlear-implant-stimulus-2108.05929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-tuning-of-time-frequency-masking-algorithms-for-reverberant-artifact-removal-within-the-cochlear-implant-stimulus-2108.05929"/></url>
<url><loc>https://scifaro.com/en/abs/pruning-vs-xnor-net-a-comprehensive-study-of-deep-learning-for-audio-classification-on-edge-devices-2108.06128</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pruning-vs-xnor-net-a-comprehensive-study-of-deep-learning-for-audio-classification-on-edge-devices-2108.06128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pruning-vs-xnor-net-a-comprehensive-study-of-deep-learning-for-audio-classification-on-edge-devices-2108.06128"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-spectrum-transformation-network-for-acoustic-scene-classification-2108.06401</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-spectrum-transformation-network-for-acoustic-scene-classification-2108.06401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-spectrum-transformation-network-for-acoustic-scene-classification-2108.06401"/></url>
<url><loc>https://scifaro.com/en/abs/nist-sre-cts-superset-a-large-scale-dataset-for-telephony-speaker-recognition-2108.07118</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nist-sre-cts-superset-a-large-scale-dataset-for-telephony-speaker-recognition-2108.07118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nist-sre-cts-superset-a-large-scale-dataset-for-telephony-speaker-recognition-2108.07118"/></url>
<url><loc>https://scifaro.com/en/abs/convolutive-prediction-for-reverberant-speech-separation-2108.07194</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutive-prediction-for-reverberant-speech-separation-2108.07194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutive-prediction-for-reverberant-speech-separation-2108.07194"/></url>
<url><loc>https://scifaro.com/en/abs/convolutive-prediction-for-monaural-speech-dereverberation-and-noisy-reverberant-speaker-separation-2108.07376</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutive-prediction-for-monaural-speech-dereverberation-and-noisy-reverberant-speaker-separation-2108.07376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutive-prediction-for-monaural-speech-dereverberation-and-noisy-reverberant-speaker-separation-2108.07376"/></url>
<url><loc>https://scifaro.com/en/abs/neuralsound-learning-based-modal-sound-synthesis-with-acoustic-transfer-2108.07425</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuralsound-learning-based-modal-sound-synthesis-with-acoustic-transfer-2108.07425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuralsound-learning-based-modal-sound-synthesis-with-acoustic-transfer-2108.07425"/></url>
<url><loc>https://scifaro.com/en/abs/neonatal-bowel-sound-detection-using-convolutional-neural-network-and-laplace-hidden-semi-markov-model-2108.07467</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neonatal-bowel-sound-detection-using-convolutional-neural-network-and-laplace-hidden-semi-markov-model-2108.07467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neonatal-bowel-sound-detection-using-convolutional-neural-network-and-laplace-hidden-semi-markov-model-2108.07467"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-playable-piano-fingering-by-pitch-difference-fingering-matching-model-2108.09058</loc><lastmod>2021-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-playable-piano-fingering-by-pitch-difference-fingering-matching-model-2108.09058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-playable-piano-fingering-by-pitch-difference-fingering-matching-model-2108.09058"/></url>
<url><loc>https://scifaro.com/en/abs/using-growth-transform-dynamical-systems-for-spatio-temporal-data-sonification-2108.09537</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-growth-transform-dynamical-systems-for-spatio-temporal-data-sonification-2108.09537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-growth-transform-dynamical-systems-for-spatio-temporal-data-sonification-2108.09537"/></url>
<url><loc>https://scifaro.com/en/abs/subject-envelope-based-multitype-reconstruction-algorithm-of-speech-samples-of-parkinson-s-disease-2108.09922</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subject-envelope-based-multitype-reconstruction-algorithm-of-speech-samples-of-parkinson-s-disease-2108.09922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subject-envelope-based-multitype-reconstruction-algorithm-of-speech-samples-of-parkinson-s-disease-2108.09922"/></url>
<url><loc>https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-2-analysis-of-musical-pieces-by-the-exceptional-musical-icosahedra-2108.10294</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-2-analysis-of-musical-pieces-by-the-exceptional-musical-icosahedra-2108.10294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-2-analysis-of-musical-pieces-by-the-exceptional-musical-icosahedra-2108.10294"/></url>
<url><loc>https://scifaro.com/en/abs/one-tts-alignment-to-rule-them-all-2108.10447</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-tts-alignment-to-rule-them-all-2108.10447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-tts-alignment-to-rule-them-all-2108.10447"/></url>
<url><loc>https://scifaro.com/en/abs/differential-music-automated-music-generation-using-lstm-networks-with-representation-based-on-melodic-and-harmonic-intervals-2108.10449</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differential-music-automated-music-generation-using-lstm-networks-with-representation-based-on-melodic-and-harmonic-intervals-2108.10449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differential-music-automated-music-generation-using-lstm-networks-with-representation-based-on-melodic-and-harmonic-intervals-2108.10449"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-drill-failure-in-the-small-short-sound-drill-dataset-2108.11089</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-drill-failure-in-the-small-short-sound-drill-dataset-2108.11089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-drill-failure-in-the-small-short-sound-drill-dataset-2108.11089"/></url>
<url><loc>https://scifaro.com/en/abs/accomontage-accompaniment-arrangement-via-phrase-selection-and-style-transfer-2108.11213</loc><lastmod>2021-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accomontage-accompaniment-arrangement-via-phrase-selection-and-style-transfer-2108.11213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accomontage-accompaniment-arrangement-via-phrase-selection-and-style-transfer-2108.11213"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-for-audio-super-resolution-2108.11637</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-for-audio-super-resolution-2108.11637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-for-audio-super-resolution-2108.11637"/></url>
<url><loc>https://scifaro.com/en/abs/determining-the-origin-of-impulsive-noise-events-using-paired-wireless-sound-sensors-2108.11758</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/determining-the-origin-of-impulsive-noise-events-using-paired-wireless-sound-sensors-2108.11758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/determining-the-origin-of-impulsive-noise-events-using-paired-wireless-sound-sensors-2108.11758"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-emotions-and-evaluation-of-customer-satisfaction-from-speech-in-real-world-acoustic-environments-2108.11981</loc><lastmod>2021-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-emotions-and-evaluation-of-customer-satisfaction-from-speech-in-real-world-acoustic-environments-2108.11981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-emotions-and-evaluation-of-customer-satisfaction-from-speech-in-real-world-acoustic-environments-2108.11981"/></url>
<url><loc>https://scifaro.com/en/abs/full-attention-bidirectional-deep-learning-structure-for-single-channel-speech-enhancement-2108.12105</loc><lastmod>2021-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-attention-bidirectional-deep-learning-structure-for-single-channel-speech-enhancement-2108.12105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-attention-bidirectional-deep-learning-structure-for-single-channel-speech-enhancement-2108.12105"/></url>
<url><loc>https://scifaro.com/en/abs/task-aware-warping-factors-in-mask-based-speech-enhancement-2108.12128</loc><lastmod>2021-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-aware-warping-factors-in-mask-based-speech-enhancement-2108.12128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-aware-warping-factors-in-mask-based-speech-enhancement-2108.12128"/></url>
<url><loc>https://scifaro.com/en/abs/separable-temporal-convolution-plus-temporally-pooled-attention-for-lightweight-high-performance-keyword-spotting-2108.12146</loc><lastmod>2021-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separable-temporal-convolution-plus-temporally-pooled-attention-for-lightweight-high-performance-keyword-spotting-2108.12146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separable-temporal-convolution-plus-temporally-pooled-attention-for-lightweight-high-performance-keyword-spotting-2108.12146"/></url>
<url><loc>https://scifaro.com/en/abs/music-composition-with-deep-learning-a-review-2108.12290</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-composition-with-deep-learning-a-review-2108.12290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-composition-with-deep-learning-a-review-2108.12290"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-learning-of-deep-features-for-music-segmentation-2108.12955</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-learning-of-deep-features-for-music-segmentation-2108.12955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-learning-of-deep-features-for-music-segmentation-2108.12955"/></url>
<url><loc>https://scifaro.com/en/abs/armor-a-benchmark-for-meta-evaluation-of-artificial-music-2108.12973</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/armor-a-benchmark-for-meta-evaluation-of-artificial-music-2108.12973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/armor-a-benchmark-for-meta-evaluation-of-artificial-music-2108.12973"/></url>
<url><loc>https://scifaro.com/en/abs/rsknet-mtsp-effective-and-portable-deep-architecture-for-speaker-verification-2108.13249</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rsknet-mtsp-effective-and-portable-deep-architecture-for-speaker-verification-2108.13249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rsknet-mtsp-effective-and-portable-deep-architecture-for-speaker-verification-2108.13249"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-example-devastation-and-detection-on-speech-recognition-system-by-adding-random-noise-2108.13562</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-example-devastation-and-detection-on-speech-recognition-system-by-adding-random-noise-2108.13562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-example-devastation-and-detection-on-speech-recognition-system-by-adding-random-noise-2108.13562"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-based-domain-adaptation-for-robust-speaker-verification-2108.13843</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-based-domain-adaptation-for-robust-speaker-verification-2108.13843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-based-domain-adaptation-for-robust-speaker-verification-2108.13843"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-non-invasive-cough-detection-based-on-accelerometer-and-audio-signals-2109.00103</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-non-invasive-cough-detection-based-on-accelerometer-and-audio-signals-2109.00103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-non-invasive-cough-detection-based-on-accelerometer-and-audio-signals-2109.00103"/></url>
<url><loc>https://scifaro.com/en/abs/ctal-pre-training-cross-modal-transformer-for-audio-and-language-representations-2109.00181</loc><lastmod>2021-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctal-pre-training-cross-modal-transformer-for-audio-and-language-representations-2109.00181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctal-pre-training-cross-modal-transformer-for-audio-and-language-representations-2109.00181"/></url>
<url><loc>https://scifaro.com/en/abs/prior-distribution-design-for-music-bleeding-sound-reduction-based-on-nonnegative-matrix-factorization-2109.00237</loc><lastmod>2021-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prior-distribution-design-for-music-bleeding-sound-reduction-based-on-nonnegative-matrix-factorization-2109.00237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prior-distribution-design-for-music-bleeding-sound-reduction-based-on-nonnegative-matrix-factorization-2109.00237"/></url>
<url><loc>https://scifaro.com/en/abs/a-separable-temporal-convolution-neural-network-with-attention-for-small-footprint-keyword-spotting-2109.00260</loc><lastmod>2021-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-separable-temporal-convolution-neural-network-with-attention-for-small-footprint-keyword-spotting-2109.00260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-separable-temporal-convolution-neural-network-with-attention-for-small-footprint-keyword-spotting-2109.00260"/></url>
<url><loc>https://scifaro.com/en/abs/embedding-and-beamforming-all-neural-causal-beamformer-for-multichannel-speech-enhancement-2109.00265</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedding-and-beamforming-all-neural-causal-beamformer-for-multichannel-speech-enhancement-2109.00265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedding-and-beamforming-all-neural-causal-beamformer-for-multichannel-speech-enhancement-2109.00265"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-multi-centroid-template-matching-algorithm-and-its-application-to-cough-detection-2109.00630</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-multi-centroid-template-matching-algorithm-and-its-application-to-cough-detection-2109.00630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-multi-centroid-template-matching-algorithm-and-its-application-to-cough-detection-2109.00630"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-deep-melody-generation-via-hierarchical-music-structure-representation-2109.00663</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-deep-melody-generation-via-hierarchical-music-structure-representation-2109.00663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-deep-melody-generation-via-hierarchical-music-structure-representation-2109.00663"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-audio-source-separation-with-independent-deeply-learned-matrix-analysis-using-product-of-source-models-2109.00704</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-audio-source-separation-with-independent-deeply-learned-matrix-analysis-using-product-of-source-models-2109.00704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-audio-source-separation-with-independent-deeply-learned-matrix-analysis-using-product-of-source-models-2109.00704"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-audio-generation-via-multi-task-learning-2109.00748</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-audio-generation-via-multi-task-learning-2109.00748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-audio-generation-via-multi-task-learning-2109.00748"/></url>
<url><loc>https://scifaro.com/en/abs/network-modulation-synthesis-new-algorithms-for-generating-musical-audio-using-autoencoder-networks-2109.01948</loc><lastmod>2025-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/network-modulation-synthesis-new-algorithms-for-generating-musical-audio-using-autoencoder-networks-2109.01948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/network-modulation-synthesis-new-algorithms-for-generating-musical-audio-using-autoencoder-networks-2109.01948"/></url>
<url><loc>https://scifaro.com/en/abs/the-speakin-system-for-voxceleb-speaker-recognition-challange-2021-2109.01989</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-speakin-system-for-voxceleb-speaker-recognition-challange-2021-2109.01989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-speakin-system-for-voxceleb-speaker-recognition-challange-2021-2109.01989"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-complex-network-using-cycle-consistent-generative-adversarial-networks-for-speech-enhancement-2109.02011</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-complex-network-using-cycle-consistent-generative-adversarial-networks-for-speech-enhancement-2109.02011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-complex-network-using-cycle-consistent-generative-adversarial-networks-for-speech-enhancement-2109.02011"/></url>
<url><loc>https://scifaro.com/en/abs/the-bytedance-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.02047</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-bytedance-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.02047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-bytedance-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.02047"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-attention-branch-network-with-combined-loss-function-for-automatic-speaker-verification-spoof-detection-2109.02051</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-attention-branch-network-with-combined-loss-function-for-automatic-speaker-verification-spoof-detection-2109.02051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-attention-branch-network-with-combined-loss-function-for-automatic-speaker-verification-spoof-detection-2109.02051"/></url>
<url><loc>https://scifaro.com/en/abs/the-phonexia-voxceleb-speaker-recognition-challenge-2021-system-description-2109.02052</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-phonexia-voxceleb-speaker-recognition-challenge-2021-system-description-2109.02052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-phonexia-voxceleb-speaker-recognition-challenge-2021-system-description-2109.02052"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-transfer-with-variational-auto-encoding-and-cycle-consistent-adversarial-networks-2109.02096</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-transfer-with-variational-auto-encoding-and-cycle-consistent-adversarial-networks-2109.02096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-transfer-with-variational-auto-encoding-and-cycle-consistent-adversarial-networks-2109.02096"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-musical-version-identification-elements-and-challenges-2109.02472</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-musical-version-identification-elements-and-challenges-2109.02472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-musical-version-identification-elements-and-challenges-2109.02472"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-challenges-limitations-and-compatibility-for-audio-restoration-processes-2109.02692</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-challenges-limitations-and-compatibility-for-audio-restoration-processes-2109.02692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-challenges-limitations-and-compatibility-for-audio-restoration-processes-2109.02692"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-soundnet-predicting-semantics-depth-and-motion-with-binaural-sounds-2109.02763</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-soundnet-predicting-semantics-depth-and-motion-with-binaural-sounds-2109.02763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-soundnet-predicting-semantics-depth-and-motion-with-binaural-sounds-2109.02763"/></url>
<url><loc>https://scifaro.com/en/abs/complementing-handcrafted-features-with-raw-waveform-using-a-light-weight-auxiliary-model-2109.02773</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complementing-handcrafted-features-with-raw-waveform-using-a-light-weight-auxiliary-model-2109.02773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complementing-handcrafted-features-with-raw-waveform-using-a-light-weight-auxiliary-model-2109.02773"/></url>
<url><loc>https://scifaro.com/en/abs/fastaudio-a-learnable-audio-front-end-for-spoof-speech-detection-2109.02774</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastaudio-a-learnable-audio-front-end-for-spoof-speech-detection-2109.02774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastaudio-a-learnable-audio-front-end-for-spoof-speech-detection-2109.02774"/></url>
<url><loc>https://scifaro.com/en/abs/fruit-cov-an-efficient-vision-based-framework-for-speedy-detection-and-diagnosis-of-sars-cov-2-infections-through-recorded-cough-sounds-2109.03219</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fruit-cov-an-efficient-vision-based-framework-for-speedy-detection-and-diagnosis-of-sars-cov-2-infections-through-recorded-cough-sounds-2109.03219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fruit-cov-an-efficient-vision-based-framework-for-speedy-detection-and-diagnosis-of-sars-cov-2-infections-through-recorded-cough-sounds-2109.03219"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-sound-source-localization-with-deep-learning-methods-2109.03465</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-sound-source-localization-with-deep-learning-methods-2109.03465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-sound-source-localization-with-deep-learning-methods-2109.03465"/></url>
<url><loc>https://scifaro.com/en/abs/time-alignment-using-lip-images-for-frame-based-electrolaryngeal-voice-conversion-2109.03551</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-alignment-using-lip-images-for-frame-based-electrolaryngeal-voice-conversion-2109.03551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-alignment-using-lip-images-for-frame-based-electrolaryngeal-voice-conversion-2109.03551"/></url>
<url><loc>https://scifaro.com/en/abs/beijing-zkj-npu-speaker-verification-system-for-voxceleb-speaker-recognition-challenge-2021-2109.03568</loc><lastmod>2021-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beijing-zkj-npu-speaker-verification-system-for-voxceleb-speaker-recognition-challenge-2021-2109.03568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beijing-zkj-npu-speaker-verification-system-for-voxceleb-speaker-recognition-challenge-2021-2109.03568"/></url>
<url><loc>https://scifaro.com/en/abs/beamtransformer-microphone-array-based-overlapping-speech-detection-2109.04049</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beamtransformer-microphone-array-based-overlapping-speech-detection-2109.04049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beamtransformer-microphone-array-based-overlapping-speech-detection-2109.04049"/></url>
<url><loc>https://scifaro.com/en/abs/deepemo-deep-learning-for-speech-emotion-recognition-2109.04081</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepemo-deep-learning-for-speech-emotion-recognition-2109.04081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepemo-deep-learning-for-speech-emotion-recognition-2109.04081"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-by-noise-self-supervised-rank-constrained-spatial-covariance-matrix-estimation-via-independent-deeply-learned-matrix-analysis-2109.04658</loc><lastmod>2021-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-by-noise-self-supervised-rank-constrained-spatial-covariance-matrix-estimation-via-independent-deeply-learned-matrix-analysis-2109.04658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-by-noise-self-supervised-rank-constrained-spatial-covariance-matrix-estimation-via-independent-deeply-learned-matrix-analysis-2109.04658"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-channel-combinator-frontend-for-end-to-end-multichannel-far-field-speech-recognition-2109.04783</loc><lastmod>2021-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-channel-combinator-frontend-for-end-to-end-multichannel-far-field-speech-recognition-2109.04783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-channel-combinator-frontend-for-end-to-end-multichannel-far-field-speech-recognition-2109.04783"/></url>
<url><loc>https://scifaro.com/en/abs/decoupling-magnitude-and-phase-estimation-with-deep-resunet-for-music-source-separation-2109.05418</loc><lastmod>2021-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupling-magnitude-and-phase-estimation-with-deep-resunet-for-music-source-separation-2109.05418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupling-magnitude-and-phase-estimation-with-deep-resunet-for-music-source-separation-2109.05418"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-text-to-speech-for-text-based-insertion-in-audio-narration-2109.05426</loc><lastmod>2021-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-for-text-based-insertion-in-audio-narration-2109.05426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-for-text-based-insertion-in-audio-narration-2109.05426"/></url>
<url><loc>https://scifaro.com/en/abs/structure-enhanced-pop-music-generation-via-harmony-aware-learning-2109.06441</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structure-enhanced-pop-music-generation-via-harmony-aware-learning-2109.06441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structure-enhanced-pop-music-generation-via-harmony-aware-learning-2109.06441"/></url>
<url><loc>https://scifaro.com/en/abs/a-machine-learning-framework-for-acoustic-design-assessment-in-early-design-stages-2109.06459</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-machine-learning-framework-for-acoustic-design-assessment-in-early-design-stages-2109.06459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-machine-learning-framework-for-acoustic-design-assessment-in-early-design-stages-2109.06459"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-emotion-disentangling-and-transfer-for-end-to-end-speech-synthesis-2109.06733</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-emotion-disentangling-and-transfer-for-end-to-end-speech-synthesis-2109.06733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-emotion-disentangling-and-transfer-for-end-to-end-speech-synthesis-2109.06733"/></url>
<url><loc>https://scifaro.com/en/abs/bachmmachine-an-interpretable-and-scalable-model-for-algorithmic-harmonization-for-four-part-baroque-chorales-2109.07623</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bachmmachine-an-interpretable-and-scalable-model-for-algorithmic-harmonization-for-four-part-baroque-chorales-2109.07623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bachmmachine-an-interpretable-and-scalable-model-for-algorithmic-harmonization-for-four-part-baroque-chorales-2109.07623"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-placement-agnosticism-improving-the-distance-based-amplitude-panning-algorithm-2109.08704</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-placement-agnosticism-improving-the-distance-based-amplitude-panning-algorithm-2109.08704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-placement-agnosticism-improving-the-distance-based-amplitude-panning-algorithm-2109.08704"/></url>
<url><loc>https://scifaro.com/en/abs/speechnas-towards-better-trade-off-between-latency-and-accuracy-for-large-scale-speaker-verification-2109.08839</loc><lastmod>2022-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechnas-towards-better-trade-off-between-latency-and-accuracy-for-large-scale-speaker-verification-2109.08839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechnas-towards-better-trade-off-between-latency-and-accuracy-for-large-scale-speaker-verification-2109.08839"/></url>
<url><loc>https://scifaro.com/en/abs/ms-sincresnet-joint-learning-of-1d-and-2d-kernels-using-multi-scale-sincnet-and-resnet-for-music-genre-classification-2109.08910</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ms-sincresnet-joint-learning-of-1d-and-2d-kernels-using-multi-scale-sincnet-and-resnet-for-music-genre-classification-2109.08910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ms-sincresnet-joint-learning-of-1d-and-2d-kernels-using-multi-scale-sincnet-and-resnet-for-music-genre-classification-2109.08910"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-data-augmentation-and-deep-attention-based-dilated-convolutional-recurrent-neural-networks-for-speech-emotion-recognition-2109.09026</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-data-augmentation-and-deep-attention-based-dilated-convolutional-recurrent-neural-networks-for-speech-emotion-recognition-2109.09026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-data-augmentation-and-deep-attention-based-dilated-convolutional-recurrent-neural-networks-for-speech-emotion-recognition-2109.09026"/></url>
<url><loc>https://scifaro.com/en/abs/arca23k-an-audio-dataset-for-investigating-open-set-label-noise-2109.09227</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arca23k-an-audio-dataset-for-investigating-open-set-label-noise-2109.09227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arca23k-an-audio-dataset-for-investigating-open-set-label-noise-2109.09227"/></url>
<url><loc>https://scifaro.com/en/abs/telemelody-lyric-to-melody-generation-with-a-template-based-two-stage-method-2109.09617</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/telemelody-lyric-to-melody-generation-with-a-template-based-two-stage-method-2109.09617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/telemelody-lyric-to-melody-generation-with-a-template-based-two-stage-method-2109.09617"/></url>
<url><loc>https://scifaro.com/en/abs/audio-interval-retrieval-using-convolutional-neural-networks-2109.09906</loc><lastmod>2021-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-interval-retrieval-using-convolutional-neural-networks-2109.09906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-interval-retrieval-using-convolutional-neural-networks-2109.09906"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-synthesis-framework-derived-from-industrial-process-control-2109.10455</loc><lastmod>2021-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-synthesis-framework-derived-from-industrial-process-control-2109.10455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-synthesis-framework-derived-from-industrial-process-control-2109.10455"/></url>
<url><loc>https://scifaro.com/en/abs/a-few-shot-learning-approach-for-sound-source-distance-estimation-using-relation-networks-2109.10561</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-few-shot-learning-approach-for-sound-source-distance-estimation-using-relation-networks-2109.10561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-few-shot-learning-approach-for-sound-source-distance-estimation-using-relation-networks-2109.10561"/></url>
<url><loc>https://scifaro.com/en/abs/noisy-to-noisy-voice-conversion-framework-with-denoising-model-2109.10608</loc><lastmod>2021-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisy-to-noisy-voice-conversion-framework-with-denoising-model-2109.10608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisy-to-noisy-voice-conversion-framework-with-denoising-model-2109.10608"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-incremental-text-to-speech-synthesis-with-distilled-context-prediction-network-2109.10724</loc><lastmod>2021-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-incremental-text-to-speech-synthesis-with-distilled-context-prediction-network-2109.10724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-incremental-text-to-speech-synthesis-with-distilled-context-prediction-network-2109.10724"/></url>
<url><loc>https://scifaro.com/en/abs/scenario-aware-speech-recognition-advancements-for-apollo-fearless-steps-chime-4-corpora-2109.11086</loc><lastmod>2021-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scenario-aware-speech-recognition-advancements-for-apollo-fearless-steps-chime-4-corpora-2109.11086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scenario-aware-speech-recognition-advancements-for-apollo-fearless-steps-chime-4-corpora-2109.11086"/></url>
<url><loc>https://scifaro.com/en/abs/unet-tts-improving-unseen-speaker-and-style-transfer-in-one-shot-voice-cloning-2109.11115</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unet-tts-improving-unseen-speaker-and-style-transfer-in-one-shot-voice-cloning-2109.11115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unet-tts-improving-unseen-speaker-and-style-transfer-in-one-shot-voice-cloning-2109.11115"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speaker-diarisation-and-tracking-in-switching-state-space-model-2109.11140</loc><lastmod>2021-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speaker-diarisation-and-tracking-in-switching-state-space-model-2109.11140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speaker-diarisation-and-tracking-in-switching-state-space-model-2109.11140"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-neural-networks-for-one-dimensional-sound-field-predictions-with-parameterized-sources-and-impedance-boundaries-2109.11313</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-neural-networks-for-one-dimensional-sound-field-predictions-with-parameterized-sources-and-impedance-boundaries-2109.11313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-neural-networks-for-one-dimensional-sound-field-predictions-with-parameterized-sources-and-impedance-boundaries-2109.11313"/></url>
<url><loc>https://scifaro.com/en/abs/implementation-of-interactive-tools-for-investigating-fundamental-frequency-response-of-voiced-sounds-to-auditory-stimulation-2109.11594</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementation-of-interactive-tools-for-investigating-fundamental-frequency-response-of-voiced-sounds-to-auditory-stimulation-2109.11594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementation-of-interactive-tools-for-investigating-fundamental-frequency-response-of-voiced-sounds-to-auditory-stimulation-2109.11594"/></url>
<url><loc>https://scifaro.com/en/abs/causal-analysis-of-carnatic-music-a-preliminary-study-2109.11782</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-analysis-of-carnatic-music-a-preliminary-study-2109.11782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-analysis-of-carnatic-music-a-preliminary-study-2109.11782"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-x-vector-based-speaker-anonymization-under-white-box-assessment-2109.11946</loc><lastmod>2021-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-x-vector-based-speaker-anonymization-under-white-box-assessment-2109.11946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-x-vector-based-speaker-anonymization-under-white-box-assessment-2109.11946"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-acquisition-setup-for-data-driven-acoustic-design-2109.12014</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-acquisition-setup-for-data-driven-acoustic-design-2109.12014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-acquisition-setup-for-data-driven-acoustic-design-2109.12014"/></url>
<url><loc>https://scifaro.com/en/abs/parameterized-channel-normalization-for-far-field-deep-speaker-verification-2109.12056</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameterized-channel-normalization-for-far-field-deep-speaker-verification-2109.12056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameterized-channel-normalization-for-far-field-deep-speaker-verification-2109.12056"/></url>
<url><loc>https://scifaro.com/en/abs/optimized-power-normalized-cepstral-coefficients-towards-robust-deep-speaker-verification-2109.12058</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimized-power-normalized-cepstral-coefficients-towards-robust-deep-speaker-verification-2109.12058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimized-power-normalized-cepstral-coefficients-towards-robust-deep-speaker-verification-2109.12058"/></url>
<url><loc>https://scifaro.com/en/abs/rendering-spatial-sound-for-interoperable-experiences-in-the-audio-metaverse-2109.12471</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rendering-spatial-sound-for-interoperable-experiences-in-the-audio-metaverse-2109.12471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rendering-spatial-sound-for-interoperable-experiences-in-the-audio-metaverse-2109.12471"/></url>
<url><loc>https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-3-musical-invariant-and-melakarta-raga-2109.12475</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-3-musical-invariant-and-melakarta-raga-2109.12475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/general-theory-of-music-by-icosahedron-3-musical-invariant-and-melakarta-raga-2109.12475"/></url>
<url><loc>https://scifaro.com/en/abs/joint-magnitude-estimation-and-phase-recovery-using-cycle-in-cycle-gan-for-non-parallel-speech-enhancement-2109.12591</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-magnitude-estimation-and-phase-recovery-using-cycle-in-cycle-gan-for-non-parallel-speech-enhancement-2109.12591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-magnitude-estimation-and-phase-recovery-using-cycle-in-cycle-gan-for-non-parallel-speech-enhancement-2109.12591"/></url>
<url><loc>https://scifaro.com/en/abs/soundata-a-python-library-for-reproducible-use-of-audio-datasets-2109.12690</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundata-a-python-library-for-reproducible-use-of-audio-datasets-2109.12690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundata-a-python-library-for-reproducible-use-of-audio-datasets-2109.12690"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-angle-of-arrival-aoa-of-multiple-echoes-in-a-steering-vector-space-2109.13072</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-angle-of-arrival-aoa-of-multiple-echoes-in-a-steering-vector-space-2109.13072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-angle-of-arrival-aoa-of-multiple-echoes-in-a-steering-vector-space-2109.13072"/></url>
<url><loc>https://scifaro.com/en/abs/inferring-facing-direction-from-voice-signals-2109.13094</loc><lastmod>2021-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inferring-facing-direction-from-voice-signals-2109.13094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inferring-facing-direction-from-voice-signals-2109.13094"/></url>
<url><loc>https://scifaro.com/en/abs/fastmvae2-on-improving-and-accelerating-the-fast-variational-autoencoder-based-source-separation-algorithm-for-determined-mixtures-2109.13496</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastmvae2-on-improving-and-accelerating-the-fast-variational-autoencoder-based-source-separation-algorithm-for-determined-mixtures-2109.13496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastmvae2-on-improving-and-accelerating-the-fast-variational-autoencoder-based-source-separation-algorithm-for-determined-mixtures-2109.13496"/></url>
<url><loc>https://scifaro.com/en/abs/flowvocoder-a-small-footprint-neural-vocoder-based-normalizing-flow-for-speech-synthesis-2109.13675</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowvocoder-a-small-footprint-neural-vocoder-based-normalizing-flow-for-speech-synthesis-2109.13675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowvocoder-a-small-footprint-neural-vocoder-based-normalizing-flow-for-speech-synthesis-2109.13675"/></url>
<url><loc>https://scifaro.com/en/abs/voicefixer-toward-general-speech-restoration-with-neural-vocoder-2109.13731</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicefixer-toward-general-speech-restoration-with-neural-vocoder-2109.13731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicefixer-toward-general-speech-restoration-with-neural-vocoder-2109.13731"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-voice-conversion-with-fast-maximum-likelihood-sampling-scheme-2109.13821</loc><lastmod>2022-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-voice-conversion-with-fast-maximum-likelihood-sampling-scheme-2109.13821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-voice-conversion-with-fast-maximum-likelihood-sampling-scheme-2109.13821"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-semi-supervised-audio-event-classification-using-contrastive-regularization-2109.14508</loc><lastmod>2021-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-semi-supervised-audio-event-classification-using-contrastive-regularization-2109.14508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-semi-supervised-audio-event-classification-using-contrastive-regularization-2109.14508"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-approach-for-sparse-representations-using-the-locally-competitive-algorithm-for-audio-2109.14705</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-approach-for-sparse-representations-using-the-locally-competitive-algorithm-for-audio-2109.14705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-approach-for-sparse-representations-using-the-locally-competitive-algorithm-for-audio-2109.14705"/></url>
<url><loc>https://scifaro.com/en/abs/emergency-vehicles-audio-detection-and-localization-in-autonomous-driving-2109.14797</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emergency-vehicles-audio-detection-and-localization-in-autonomous-driving-2109.14797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emergency-vehicles-audio-detection-and-localization-in-autonomous-driving-2109.14797"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-wav2vec2-for-speaker-recognition-2109.15053</loc><lastmod>2022-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-wav2vec2-for-speaker-recognition-2109.15053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-wav2vec2-for-speaker-recognition-2109.15053"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-algorithmic-biases-for-musical-version-identification-2109.15188</loc><lastmod>2021-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-algorithmic-biases-for-musical-version-identification-2109.15188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-algorithmic-biases-for-musical-version-identification-2109.15188"/></url>
<url><loc>https://scifaro.com/en/abs/spliceout-a-simple-and-efficient-audio-augmentation-method-2110.00046</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spliceout-a-simple-and-efficient-audio-augmentation-method-2110.00046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spliceout-a-simple-and-efficient-audio-augmentation-method-2110.00046"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-layer-wise-self-supervised-learning-for-efficient-speech-domain-adaptation-on-device-2110.00155</loc><lastmod>2021-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-layer-wise-self-supervised-learning-for-efficient-speech-domain-adaptation-on-device-2110.00155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-layer-wise-self-supervised-learning-for-efficient-speech-domain-adaptation-on-device-2110.00155"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-low-distortion-target-estimates-for-improved-speech-enhancement-2110.00570</loc><lastmod>2021-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-low-distortion-target-estimates-for-improved-speech-enhancement-2110.00570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-low-distortion-target-estimates-for-improved-speech-enhancement-2110.00570"/></url>
<url><loc>https://scifaro.com/en/abs/processing-phoneme-specific-segments-for-cleft-lip-and-palate-speech-enhancement-2110.00794</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/processing-phoneme-specific-segments-for-cleft-lip-and-palate-speech-enhancement-2110.00794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/processing-phoneme-specific-segments-for-cleft-lip-and-palate-speech-enhancement-2110.00794"/></url>
<url><loc>https://scifaro.com/en/abs/pl-eesr-perceptual-loss-based-end-to-end-robust-speaker-representation-extraction-2110.00940</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pl-eesr-perceptual-loss-based-end-to-end-robust-speaker-representation-extraction-2110.00940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pl-eesr-perceptual-loss-based-end-to-end-robust-speaker-representation-extraction-2110.00940"/></url>
<url><loc>https://scifaro.com/en/abs/enriching-ontology-with-temporal-commonsense-for-low-resource-audio-tagging-2110.01009</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enriching-ontology-with-temporal-commonsense-for-low-resource-audio-tagging-2110.01009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enriching-ontology-with-temporal-commonsense-for-low-resource-audio-tagging-2110.01009"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-interplay-between-sparsity-naturalness-intelligibility-and-prosody-in-speech-synthesis-2110.01147</loc><lastmod>2021-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-interplay-between-sparsity-naturalness-intelligibility-and-prosody-in-speech-synthesis-2110.01147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-interplay-between-sparsity-naturalness-intelligibility-and-prosody-in-speech-synthesis-2110.01147"/></url>
<url><loc>https://scifaro.com/en/abs/audio-captioning-using-sound-event-detection-2110.01210</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-captioning-using-sound-event-detection-2110.01210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-captioning-using-sound-event-detection-2110.01210"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-evaluation-of-oratory-skills-2110.01367</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-evaluation-of-oratory-skills-2110.01367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-evaluation-of-oratory-skills-2110.01367"/></url>
<url><loc>https://scifaro.com/en/abs/building-a-noisy-audio-dataset-to-evaluate-machine-learning-approaches-for-automatic-speech-recognition-systems-2110.01425</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-a-noisy-audio-dataset-to-evaluate-machine-learning-approaches-for-automatic-speech-recognition-systems-2110.01425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-a-noisy-audio-dataset-to-evaluate-machine-learning-approaches-for-automatic-speech-recognition-systems-2110.01425"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-transformer-an-event-based-end-to-end-model-for-sound-event-detection-2110.02011</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-transformer-an-event-based-end-to-end-model-for-sound-event-detection-2110.02011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-transformer-an-event-based-end-to-end-model-for-sound-event-detection-2110.02011"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-in-unsupervised-acoustic-word-classification-2110.02375</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-in-unsupervised-acoustic-word-classification-2110.02375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-intermediate-convolutional-layers-in-unsupervised-acoustic-word-classification-2110.02375"/></url>
<url><loc>https://scifaro.com/en/abs/voice-aging-with-audio-visual-style-transfer-2110.02411</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-aging-with-audio-visual-style-transfer-2110.02411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-aging-with-audio-visual-style-transfer-2110.02411"/></url>
<url><loc>https://scifaro.com/en/abs/editts-score-based-editing-for-controllable-text-to-speech-2110.02584</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/editts-score-based-editing-for-controllable-text-to-speech-2110.02584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/editts-score-based-editing-for-controllable-text-to-speech-2110.02584"/></url>
<url><loc>https://scifaro.com/en/abs/spell-my-name-keyword-boosted-speech-recognition-2110.02791</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spell-my-name-keyword-boosted-speech-recognition-2110.02791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spell-my-name-keyword-boosted-speech-recognition-2110.02791"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-the-effectiveness-of-phase-for-audio-classification-2110.02878</loc><lastmod>2022-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-the-effectiveness-of-phase-for-audio-classification-2110.02878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-the-effectiveness-of-phase-for-audio-classification-2110.02878"/></url>
<url><loc>https://scifaro.com/en/abs/strengthnet-deep-learning-based-emotion-strength-assessment-for-emotional-speech-synthesis-2110.03156</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/strengthnet-deep-learning-based-emotion-strength-assessment-for-emotional-speech-synthesis-2110.03156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/strengthnet-deep-learning-based-emotion-strength-assessment-for-emotional-speech-synthesis-2110.03156"/></url>
<url><loc>https://scifaro.com/en/abs/transferring-voice-knowledge-for-acoustic-event-detection-an-empirical-study-2110.03174</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferring-voice-knowledge-for-acoustic-event-detection-an-empirical-study-2110.03174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferring-voice-knowledge-for-acoustic-event-detection-an-empirical-study-2110.03174"/></url>
<url><loc>https://scifaro.com/en/abs/attention-is-all-you-need-good-embeddings-with-statistics-are-enough-large-scale-audio-understanding-without-transformers-convolutions-berts-mixers-attention-rnns-or-2110.03183</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-is-all-you-need-good-embeddings-with-statistics-are-enough-large-scale-audio-understanding-without-transformers-convolutions-berts-mixers-attention-rnns-or-2110.03183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-is-all-you-need-good-embeddings-with-statistics-are-enough-large-scale-audio-understanding-without-transformers-convolutions-berts-mixers-attention-rnns-or-2110.03183"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-guided-by-semantic-contexts-of-scenes-2110.03243</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-guided-by-semantic-contexts-of-scenes-2110.03243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-guided-by-semantic-contexts-of-scenes-2110.03243"/></url>
<url><loc>https://scifaro.com/en/abs/a-cough-based-deep-learning-framework-for-detecting-covid-19-2110.03251</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cough-based-deep-learning-framework-for-detecting-covid-19-2110.03251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cough-based-deep-learning-framework-for-detecting-covid-19-2110.03251"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-blind-source-separation-framework-towards-maximum-signal-to-interference-ratio-2110.03272</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-blind-source-separation-framework-towards-maximum-signal-to-interference-ratio-2110.03272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-blind-source-separation-framework-towards-maximum-signal-to-interference-ratio-2110.03272"/></url>
<url><loc>https://scifaro.com/en/abs/wenetspeech-a-10000-hours-multi-domain-mandarin-corpus-for-speech-recognition-2110.03370</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wenetspeech-a-10000-hours-multi-domain-mandarin-corpus-for-speech-recognition-2110.03370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wenetspeech-a-10000-hours-multi-domain-mandarin-corpus-for-speech-recognition-2110.03370"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-the-dimensionality-reduction-of-speaker-embeddings-for-speaker-diarisation-disentangling-noise-and-informing-speech-activity-2110.03380</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-the-dimensionality-reduction-of-speaker-embeddings-for-speaker-diarisation-disentangling-noise-and-informing-speech-activity-2110.03380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-the-dimensionality-reduction-of-speaker-embeddings-for-speaker-diarisation-disentangling-noise-and-informing-speech-activity-2110.03380"/></url>
<url><loc>https://scifaro.com/en/abs/gantron-emotional-speech-synthesis-with-generative-adversarial-networks-2110.03390</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gantron-emotional-speech-synthesis-with-generative-adversarial-networks-2110.03390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gantron-emotional-speech-synthesis-with-generative-adversarial-networks-2110.03390"/></url>
<url><loc>https://scifaro.com/en/abs/serab-a-multi-lingual-benchmark-for-speech-emotion-recognition-2110.03414</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serab-a-multi-lingual-benchmark-for-speech-emotion-recognition-2110.03414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serab-a-multi-lingual-benchmark-for-speech-emotion-recognition-2110.03414"/></url>
<url><loc>https://scifaro.com/en/abs/prototype-learning-for-interpretable-respiratory-sound-analysis-2110.03536</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prototype-learning-for-interpretable-respiratory-sound-analysis-2110.03536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prototype-learning-for-interpretable-respiratory-sound-analysis-2110.03536"/></url>
<url><loc>https://scifaro.com/en/abs/voice-reenactment-with-f0-and-timing-constraints-and-adversarial-learning-of-conversions-2110.03744</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-reenactment-with-f0-and-timing-constraints-and-adversarial-learning-of-conversions-2110.03744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-reenactment-with-f0-and-timing-constraints-and-adversarial-learning-of-conversions-2110.03744"/></url>
<url><loc>https://scifaro.com/en/abs/wake-cough-cough-spotting-and-cougher-identification-for-personalised-long-term-cough-monitoring-2110.03771</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wake-cough-cough-spotting-and-cougher-identification-for-personalised-long-term-cough-monitoring-2110.03771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wake-cough-cough-spotting-and-cougher-identification-for-personalised-long-term-cough-monitoring-2110.03771"/></url>
<url><loc>https://scifaro.com/en/abs/fast-rir-fast-neural-diffuse-room-impulse-response-generator-2110.04057</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-rir-fast-neural-diffuse-room-impulse-response-generator-2110.04057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-rir-fast-neural-diffuse-room-impulse-response-generator-2110.04057"/></url>
<url><loc>https://scifaro.com/en/abs/affective-burst-detection-from-speech-using-kernel-fusion-dilated-convolutional-neural-networks-2110.04091</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/affective-burst-detection-from-speech-using-kernel-fusion-dilated-convolutional-neural-networks-2110.04091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/affective-burst-detection-from-speech-using-kernel-fusion-dilated-convolutional-neural-networks-2110.04091"/></url>
<url><loc>https://scifaro.com/en/abs/auto-dsp-learning-to-optimize-acoustic-echo-cancellers-2110.04284</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-dsp-learning-to-optimize-acoustic-echo-cancellers-2110.04284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-dsp-learning-to-optimize-acoustic-echo-cancellers-2110.04284"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lightweight-applications-asymmetric-enroll-verify-structure-for-speaker-verification-2110.04438</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lightweight-applications-asymmetric-enroll-verify-structure-for-speaker-verification-2110.04438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lightweight-applications-asymmetric-enroll-verify-structure-for-speaker-verification-2110.04438"/></url>
<url><loc>https://scifaro.com/en/abs/using-multiple-reference-audios-and-style-embedding-constraints-for-speech-synthesis-2110.04451</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-multiple-reference-audios-and-style-embedding-constraints-for-speech-synthesis-2110.04451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-multiple-reference-audios-and-style-embedding-constraints-for-speech-synthesis-2110.04451"/></url>
<url><loc>https://scifaro.com/en/abs/a-mutual-learning-framework-for-few-shot-sound-event-detection-2110.04474</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mutual-learning-framework-for-few-shot-sound-event-detection-2110.04474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mutual-learning-framework-for-few-shot-sound-event-detection-2110.04474"/></url>
<url><loc>https://scifaro.com/en/abs/pama-tts-progression-aware-monotonic-attention-for-stable-seq2seq-tts-with-accurate-phoneme-duration-control-2110.04486</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pama-tts-progression-aware-monotonic-attention-for-stable-seq2seq-tts-with-accurate-phoneme-duration-control-2110.04486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pama-tts-progression-aware-monotonic-attention-for-stable-seq2seq-tts-with-accurate-phoneme-duration-control-2110.04486"/></url>
<url><loc>https://scifaro.com/en/abs/universal-paralinguistic-speech-representations-using-self-supervised-conformers-2110.04621</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-paralinguistic-speech-representations-using-self-supervised-conformers-2110.04621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-paralinguistic-speech-representations-using-self-supervised-conformers-2110.04621"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-on-device-detection-of-device-directed-speech-from-voice-and-touch-based-invocation-2110.04656</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-on-device-detection-of-device-directed-speech-from-voice-and-touch-based-invocation-2110.04656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-on-device-detection-of-device-directed-speech-from-voice-and-touch-based-invocation-2110.04656"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-techniques-for-biomarker-discovery-in-voice-signal-2110.04678</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-techniques-for-biomarker-discovery-in-voice-signal-2110.04678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-techniques-for-biomarker-discovery-in-voice-signal-2110.04678"/></url>
<url><loc>https://scifaro.com/en/abs/can-audio-captions-be-evaluated-with-image-caption-metrics-2110.04684</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-audio-captions-be-evaluated-with-image-caption-metrics-2110.04684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-audio-captions-be-evaluated-with-image-caption-metrics-2110.04684"/></url>
<url><loc>https://scifaro.com/en/abs/towards-high-fidelity-singing-voice-conversion-with-acoustic-reference-and-contrastive-predictive-coding-2110.04754</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-high-fidelity-singing-voice-conversion-with-acoustic-reference-and-contrastive-predictive-coding-2110.04754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-high-fidelity-singing-voice-conversion-with-acoustic-reference-and-contrastive-predictive-coding-2110.04754"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-with-metadata-for-music-mood-classification-2110.04765</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-with-metadata-for-music-mood-classification-2110.04765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-with-metadata-for-music-mood-classification-2110.04765"/></url>
<url><loc>https://scifaro.com/en/abs/laughnet-synthesizing-laughter-utterances-from-waveform-silhouettes-and-a-single-laughter-example-2110.04946</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/laughnet-synthesizing-laughter-utterances-from-waveform-silhouettes-and-a-single-laughter-example-2110.04946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/laughnet-synthesizing-laughter-utterances-from-waveform-silhouettes-and-a-single-laughter-example-2110.04946"/></url>
<url><loc>https://scifaro.com/en/abs/kernel-learning-for-sound-field-estimation-with-l1-and-l2-regularizations-2110.04972</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kernel-learning-for-sound-field-estimation-with-l1-and-l2-regularizations-2110.04972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kernel-learning-for-sound-field-estimation-with-l1-and-l2-regularizations-2110.04972"/></url>
<url><loc>https://scifaro.com/en/abs/melons-generating-melody-with-long-term-structure-using-transformers-and-structure-graph-2110.05020</loc><lastmod>2021-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melons-generating-melody-with-long-term-structure-using-transformers-and-structure-graph-2110.05020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melons-generating-melody-with-long-term-structure-using-transformers-and-structure-graph-2110.05020"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-preservation-in-singing-voice-synthesis-2110.05033</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-preservation-in-singing-voice-synthesis-2110.05033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-preservation-in-singing-voice-synthesis-2110.05033"/></url>
<url><loc>https://scifaro.com/en/abs/multi-query-multi-head-attention-pooling-and-inter-topk-penalty-for-speaker-verification-2110.05042</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-query-multi-head-attention-pooling-and-inter-topk-penalty-for-speaker-verification-2110.05042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-query-multi-head-attention-pooling-and-inter-topk-penalty-for-speaker-verification-2110.05042"/></url>
<url><loc>https://scifaro.com/en/abs/source-mixing-and-separation-robust-audio-steganography-2110.05054</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-mixing-and-separation-robust-audio-steganography-2110.05054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-mixing-and-separation-robust-audio-steganography-2110.05054"/></url>
<url><loc>https://scifaro.com/en/abs/amicable-examples-for-informed-source-separation-2110.05059</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amicable-examples-for-informed-source-separation-2110.05059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amicable-examples-for-informed-source-separation-2110.05059"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-training-of-audio-transformers-with-patchout-2110.05069</loc><lastmod>2023-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-training-of-audio-transformers-with-patchout-2110.05069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-training-of-audio-transformers-with-patchout-2110.05069"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-resolution-front-end-for-end-to-end-speech-anti-spoofing-2110.05087</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-resolution-front-end-for-end-to-end-speech-anti-spoofing-2110.05087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-resolution-front-end-for-end-to-end-speech-anti-spoofing-2110.05087"/></url>
<url><loc>https://scifaro.com/en/abs/vocadito-a-dataset-of-solo-vocals-with-f-0-note-and-lyric-annotations-2110.05580</loc><lastmod>2021-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocadito-a-dataset-of-solo-vocals-with-f-0-note-and-lyric-annotations-2110.05580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocadito-a-dataset-of-solo-vocals-with-f-0-note-and-lyric-annotations-2110.05580"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-latent-space-disentanglement-in-the-presence-of-interdependent-attributes-2110.05587</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-latent-space-disentanglement-in-the-presence-of-interdependent-attributes-2110.05587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-latent-space-disentanglement-in-the-presence-of-interdependent-attributes-2110.05587"/></url>
<url><loc>https://scifaro.com/en/abs/foster-strengths-and-circumvent-weaknesses-a-speech-enhancement-framework-with-two-branch-collaborative-learning-2110.05713</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foster-strengths-and-circumvent-weaknesses-a-speech-enhancement-framework-with-two-branch-collaborative-learning-2110.05713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foster-strengths-and-circumvent-weaknesses-a-speech-enhancement-framework-with-two-branch-collaborative-learning-2110.05713"/></url>
<url><loc>https://scifaro.com/en/abs/music-sentiment-transfer-2110.05765</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-sentiment-transfer-2110.05765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-sentiment-transfer-2110.05765"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-self-supervised-speech-representation-learning-for-automatic-speaker-verification-2110.05777</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-self-supervised-speech-representation-learning-for-automatic-speaker-verification-2110.05777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-self-supervised-speech-representation-learning-for-automatic-speaker-verification-2110.05777"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-tts-models-for-new-speakers-using-transfer-learning-2110.05798</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-tts-models-for-new-speakers-using-transfer-learning-2110.05798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-tts-models-for-new-speakers-using-transfer-learning-2110.05798"/></url>
<url><loc>https://scifaro.com/en/abs/metricgan-u-unsupervised-speech-enhancement-dereverberation-based-only-on-noisy-reverberated-speech-2110.05866</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metricgan-u-unsupervised-speech-enhancement-dereverberation-based-only-on-noisy-reverberated-speech-2110.05866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metricgan-u-unsupervised-speech-enhancement-dereverberation-based-only-on-noisy-reverberated-speech-2110.05866"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-narrow-band-deep-speech-separation-with-full-band-permutation-invariant-training-2110.05966</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-narrow-band-deep-speech-separation-with-full-band-permutation-invariant-training-2110.05966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-narrow-band-deep-speech-separation-with-full-band-permutation-invariant-training-2110.05966"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-far-field-speaker-verification-with-large-scale-ad-hoc-microphone-arrays-2110.05975</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-far-field-speaker-verification-with-large-scale-ad-hoc-microphone-arrays-2110.05975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-far-field-speaker-verification-with-large-scale-ad-hoc-microphone-arrays-2110.05975"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-performance-of-automated-audio-captioning-via-integrating-the-acoustic-and-semantic-information-2110.06100</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-performance-of-automated-audio-captioning-via-integrating-the-acoustic-and-semantic-information-2110.06100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-performance-of-automated-audio-captioning-via-integrating-the-acoustic-and-semantic-information-2110.06100"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-diagnosis-from-cough-acoustics-using-convnets-and-data-augmentation-2110.06123</loc><lastmod>2026-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-diagnosis-from-cough-acoustics-using-convnets-and-data-augmentation-2110.06123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-diagnosis-from-cough-acoustics-using-convnets-and-data-augmentation-2110.06123"/></url>
<url><loc>https://scifaro.com/en/abs/s3prl-vc-open-source-voice-conversion-framework-with-self-supervised-speech-representations-2110.06280</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s3prl-vc-open-source-voice-conversion-framework-with-self-supervised-speech-representations-2110.06280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s3prl-vc-open-source-voice-conversion-framework-with-self-supervised-speech-representations-2110.06280"/></url>
<url><loc>https://scifaro.com/en/abs/an-annihilating-filter-based-doa-estimation-for-uniform-linear-array-2110.06323</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-annihilating-filter-based-doa-estimation-for-uniform-linear-array-2110.06323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-annihilating-filter-based-doa-estimation-for-uniform-linear-array-2110.06323"/></url>
<url><loc>https://scifaro.com/en/abs/algorithmic-composition-by-autonomous-systems-with-multiple-time-scales-2110.06371</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/algorithmic-composition-by-autonomous-systems-with-multiple-time-scales-2110.06371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/algorithmic-composition-by-autonomous-systems-with-multiple-time-scales-2110.06371"/></url>
<url><loc>https://scifaro.com/en/abs/dual-branch-attention-in-attention-transformer-for-single-channel-speech-enhancement-2110.06467</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-branch-attention-in-attention-transformer-for-single-channel-speech-enhancement-2110.06467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-branch-attention-in-attention-transformer-for-single-channel-speech-enhancement-2110.06467"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-with-deep-equilibrium-models-2110.06494</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-with-deep-equilibrium-models-2110.06494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-with-deep-equilibrium-models-2110.06494"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-data-augmentation-with-simulated-room-impulse-responses-for-sound-event-localization-and-detection-2110.06501</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-data-augmentation-with-simulated-room-impulse-responses-for-sound-event-localization-and-detection-2110.06501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-data-augmentation-with-simulated-room-impulse-responses-for-sound-event-localization-and-detection-2110.06501"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-dj-transitions-with-differentiable-audio-effects-and-generative-adversarial-networks-2110.06525</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-dj-transitions-with-differentiable-audio-effects-and-generative-adversarial-networks-2110.06525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-dj-transitions-with-differentiable-audio-effects-and-generative-adversarial-networks-2110.06525"/></url>
<url><loc>https://scifaro.com/en/abs/simple-attention-module-based-speaker-verification-with-iterative-noisy-label-detection-2110.06534</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simple-attention-module-based-speaker-verification-with-iterative-noisy-label-detection-2110.06534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simple-attention-module-based-speaker-verification-with-iterative-noisy-label-detection-2110.06534"/></url>
<url><loc>https://scifaro.com/en/abs/eihw-mtg-dicova-2021-challenge-system-report-2110.06543</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eihw-mtg-dicova-2021-challenge-system-report-2110.06543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eihw-mtg-dicova-2021-challenge-system-report-2110.06543"/></url>
<url><loc>https://scifaro.com/en/abs/duality-temporal-channel-frequency-attention-enhanced-speaker-representation-learning-2110.06565</loc><lastmod>2021-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/duality-temporal-channel-frequency-attention-enhanced-speaker-representation-learning-2110.06565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/duality-temporal-channel-frequency-attention-enhanced-speaker-representation-learning-2110.06565"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-translation-of-human-neural-activity-to-speech-with-a-dual-dual-generative-adversarial-network-2110.06634</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-translation-of-human-neural-activity-to-speech-with-a-dual-dual-generative-adversarial-network-2110.06634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-translation-of-human-neural-activity-to-speech-with-a-dual-dual-generative-adversarial-network-2110.06634"/></url>
<url><loc>https://scifaro.com/en/abs/singer-separation-for-karaoke-content-generation-2110.06707</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singer-separation-for-karaoke-content-generation-2110.06707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singer-separation-for-karaoke-content-generation-2110.06707"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-positional-encoding-approaches-for-audio-spectrogram-transformers-2110.06999</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-positional-encoding-approaches-for-audio-spectrogram-transformers-2110.06999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-positional-encoding-approaches-for-audio-spectrogram-transformers-2110.06999"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-svd-and-factorized-tdnn-approaches-for-speech-to-text-2110.07027</loc><lastmod>2021-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-svd-and-factorized-tdnn-approaches-for-speech-to-text-2110.07027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-svd-and-factorized-tdnn-approaches-for-speech-to-text-2110.07027"/></url>
<url><loc>https://scifaro.com/en/abs/improve-cross-lingual-voice-cloning-using-low-quality-code-switched-data-2110.07210</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improve-cross-lingual-voice-cloning-using-low-quality-code-switched-data-2110.07210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improve-cross-lingual-voice-cloning-using-low-quality-code-switched-data-2110.07210"/></url>
<url><loc>https://scifaro.com/en/abs/specsingan-sound-effect-variation-synthesis-using-single-image-gans-2110.07311</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specsingan-sound-effect-variation-synthesis-using-single-image-gans-2110.07311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specsingan-sound-effect-variation-synthesis-using-single-image-gans-2110.07311"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-based-self-supervised-learning-for-non-speech-audio-tasks-2110.07313</loc><lastmod>2022-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-based-self-supervised-learning-for-non-speech-audio-tasks-2110.07313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-based-self-supervised-learning-for-non-speech-audio-tasks-2110.07313"/></url>
<url><loc>https://scifaro.com/en/abs/m2met-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2110.07393</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2met-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2110.07393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2met-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2110.07393"/></url>
<url><loc>https://scifaro.com/en/abs/humbugdb-a-large-scale-acoustic-mosquito-dataset-2110.07607</loc><lastmod>2021-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/humbugdb-a-large-scale-acoustic-mosquito-dataset-2110.07607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/humbugdb-a-large-scale-acoustic-mosquito-dataset-2110.07607"/></url>
<url><loc>https://scifaro.com/en/abs/using-deepproblog-to-perform-complex-event-processing-on-an-audio-stream-2110.08090</loc><lastmod>2021-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-deepproblog-to-perform-complex-event-processing-on-an-audio-stream-2110.08090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-deepproblog-to-perform-complex-event-processing-on-an-audio-stream-2110.08090"/></url>
<url><loc>https://scifaro.com/en/abs/towards-identity-preserving-normal-to-dysarthric-voice-conversion-2110.08213</loc><lastmod>2021-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-identity-preserving-normal-to-dysarthric-voice-conversion-2110.08213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-identity-preserving-normal-to-dysarthric-voice-conversion-2110.08213"/></url>
<url><loc>https://scifaro.com/en/abs/omni-sparsity-dnn-fast-sparsity-optimization-for-on-device-streaming-e2e-asr-via-supernet-2110.08352</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omni-sparsity-dnn-fast-sparsity-optimization-for-on-device-streaming-e2e-asr-via-supernet-2110.08352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omni-sparsity-dnn-fast-sparsity-optimization-for-on-device-streaming-e2e-asr-via-supernet-2110.08352"/></url>
<url><loc>https://scifaro.com/en/abs/nn3a-neural-network-supported-acoustic-echo-cancellation-noise-suppression-and-automatic-gain-control-for-real-time-communications-2110.08437</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nn3a-neural-network-supported-acoustic-echo-cancellation-noise-suppression-and-automatic-gain-control-for-real-time-communications-2110.08437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nn3a-neural-network-supported-acoustic-echo-cancellation-noise-suppression-and-automatic-gain-control-for-real-time-communications-2110.08437"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-multichannel-speech-dereverberation-based-on-deep-neural-networks-2110.08439</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-multichannel-speech-dereverberation-based-on-deep-neural-networks-2110.08439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-multichannel-speech-dereverberation-based-on-deep-neural-networks-2110.08439"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-waveform-based-acoustic-models-2110.08634</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-waveform-based-acoustic-models-2110.08634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-waveform-based-acoustic-models-2110.08634"/></url>
<url><loc>https://scifaro.com/en/abs/improving-end-to-end-modeling-for-mispronunciation-detection-with-effective-augmentation-mechanisms-2110.08731</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-end-to-end-modeling-for-mispronunciation-detection-with-effective-augmentation-mechanisms-2110.08731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-end-to-end-modeling-for-mispronunciation-detection-with-effective-augmentation-mechanisms-2110.08731"/></url>
<url><loc>https://scifaro.com/en/abs/storage-and-authentication-of-audio-footage-for-ioaut-devices-using-distributed-ledger-technology-2110.08821</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/storage-and-authentication-of-audio-footage-for-ioaut-devices-using-distributed-ledger-technology-2110.08821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/storage-and-authentication-of-audio-footage-for-ioaut-devices-using-distributed-ledger-technology-2110.08821"/></url>
<url><loc>https://scifaro.com/en/abs/decar-deep-clustering-for-learning-general-purpose-audio-representations-2110.08895</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decar-deep-clustering-for-learning-general-purpose-audio-representations-2110.08895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decar-deep-clustering-for-learning-general-purpose-audio-representations-2110.08895"/></url>
<url><loc>https://scifaro.com/en/abs/ldnet-unified-listener-dependent-modeling-in-mos-prediction-for-synthetic-speech-2110.09103</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ldnet-unified-listener-dependent-modeling-in-mos-prediction-for-synthetic-speech-2110.09103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ldnet-unified-listener-dependent-modeling-in-mos-prediction-for-synthetic-speech-2110.09103"/></url>
<url><loc>https://scifaro.com/en/abs/real-additive-margin-softmax-for-speaker-verification-2110.09116</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-additive-margin-softmax-for-speaker-verification-2110.09116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-additive-margin-softmax-for-speaker-verification-2110.09116"/></url>
<url><loc>https://scifaro.com/en/abs/karatuner-towards-end-to-end-natural-pitch-correction-for-singing-voice-in-karaoke-2110.09121</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/karatuner-towards-end-to-end-natural-pitch-correction-for-singing-voice-in-karaoke-2110.09121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/karatuner-towards-end-to-end-natural-pitch-correction-for-singing-voice-in-karaoke-2110.09121"/></url>
<url><loc>https://scifaro.com/en/abs/spectnt-a-time-frequency-transformer-for-music-audio-2110.09127</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectnt-a-time-frequency-transformer-for-music-audio-2110.09127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectnt-a-time-frequency-transformer-for-music-audio-2110.09127"/></url>
<url><loc>https://scifaro.com/en/abs/learning-models-for-query-by-vocal-percussion-a-comparative-study-2110.09223</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-models-for-query-by-vocal-percussion-a-comparative-study-2110.09223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-models-for-query-by-vocal-percussion-a-comparative-study-2110.09223"/></url>
<url><loc>https://scifaro.com/en/abs/eihw-mtg-second-dicova-challenge-system-report-2110.09239</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eihw-mtg-second-dicova-challenge-system-report-2110.09239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eihw-mtg-second-dicova-challenge-system-report-2110.09239"/></url>
<url><loc>https://scifaro.com/en/abs/fmfcc-a-a-challenging-mandarin-dataset-for-synthetic-speech-detection-2110.09441</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fmfcc-a-a-challenging-mandarin-dataset-for-synthetic-speech-detection-2110.09441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fmfcc-a-a-challenging-mandarin-dataset-for-synthetic-speech-detection-2110.09441"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-domain-adaptation-with-paired-examples-for-acoustic-scene-classification-on-different-recording-devices-2110.09598</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-domain-adaptation-with-paired-examples-for-acoustic-scene-classification-on-different-recording-devices-2110.09598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-domain-adaptation-with-paired-examples-for-acoustic-scene-classification-on-different-recording-devices-2110.09598"/></url>
<url><loc>https://scifaro.com/en/abs/who-calls-the-shots-rethinking-few-shot-learning-for-audio-2110.09600</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-calls-the-shots-rethinking-few-shot-learning-for-audio-2110.09600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-calls-the-shots-rethinking-few-shot-learning-for-audio-2110.09600"/></url>
<url><loc>https://scifaro.com/en/abs/neural-synthesis-of-footsteps-sound-effects-with-generative-adversarial-networks-2110.09605</loc><lastmod>2021-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-synthesis-of-footsteps-sound-effects-with-generative-adversarial-networks-2110.09605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-synthesis-of-footsteps-sound-effects-with-generative-adversarial-networks-2110.09605"/></url>
<url><loc>https://scifaro.com/en/abs/neural-lexicon-reader-reduce-pronunciation-errors-in-end-to-end-tts-by-leveraging-external-textual-knowledge-2110.09698</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-lexicon-reader-reduce-pronunciation-errors-in-end-to-end-tts-by-leveraging-external-textual-knowledge-2110.09698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-lexicon-reader-reduce-pronunciation-errors-in-end-to-end-tts-by-leveraging-external-textual-knowledge-2110.09698"/></url>
<url><loc>https://scifaro.com/en/abs/rep-works-in-speaker-verification-2110.09720</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rep-works-in-speaker-verification-2110.09720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rep-works-in-speaker-verification-2110.09720"/></url>
<url><loc>https://scifaro.com/en/abs/improving-emotional-speech-synthesis-by-using-sus-constrained-vae-and-text-encoder-aggregation-2110.09780</loc><lastmod>2022-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-emotional-speech-synthesis-by-using-sus-constrained-vae-and-text-encoder-aggregation-2110.09780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-emotional-speech-synthesis-by-using-sus-constrained-vae-and-text-encoder-aggregation-2110.09780"/></url>
<url><loc>https://scifaro.com/en/abs/ssast-self-supervised-audio-spectrogram-transformer-2110.09784</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssast-self-supervised-audio-spectrogram-transformer-2110.09784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssast-self-supervised-audio-spectrogram-transformer-2110.09784"/></url>
<url><loc>https://scifaro.com/en/abs/speech-pattern-based-black-box-model-watermarking-for-automatic-speech-recognition-2110.09814</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-pattern-based-black-box-model-watermarking-for-automatic-speech-recognition-2110.09814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-pattern-based-black-box-model-watermarking-for-automatic-speech-recognition-2110.09814"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-separation-of-whale-vocalizations-from-background-oceanic-noise-using-a-power-calculation-2110.10010</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-separation-of-whale-vocalizations-from-background-oceanic-noise-using-a-power-calculation-2110.10010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-separation-of-whale-vocalizations-from-background-oceanic-noise-using-a-power-calculation-2110.10010"/></url>
<url><loc>https://scifaro.com/en/abs/continual-self-training-with-bootstrapped-remixing-for-speech-enhancement-2110.10103</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-self-training-with-bootstrapped-remixing-for-speech-enhancement-2110.10103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-self-training-with-bootstrapped-remixing-for-speech-enhancement-2110.10103"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-enhancing-ctc-model-for-triggered-attention-based-streaming-asr-2110.10402</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-enhancing-ctc-model-for-triggered-attention-based-streaming-asr-2110.10402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-enhancing-ctc-model-for-triggered-attention-based-streaming-asr-2110.10402"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-data-augmentation-in-voice-anti-spoofing-2110.10491</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-data-augmentation-in-voice-anti-spoofing-2110.10491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-data-augmentation-in-voice-anti-spoofing-2110.10491"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-learning-for-stabilizing-label-selection-in-speech-separation-with-mapping-based-method-2110.10593</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-learning-for-stabilizing-label-selection-in-speech-separation-with-mapping-based-method-2110.10593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-learning-for-stabilizing-label-selection-in-speech-separation-with-mapping-based-method-2110.10593"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-speech-separation-to-real-world-meetings-using-mixture-invariant-training-2110.10739</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-speech-separation-to-real-world-meetings-using-mixture-invariant-training-2110.10739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-speech-separation-to-real-world-meetings-using-mixture-invariant-training-2110.10739"/></url>
<url><loc>https://scifaro.com/en/abs/tparn-triple-path-attentive-recurrent-network-for-time-domain-multichannel-speech-enhancement-2110.10757</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tparn-triple-path-attentive-recurrent-network-for-time-domain-multichannel-speech-enhancement-2110.10757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tparn-triple-path-attentive-recurrent-network-for-time-domain-multichannel-speech-enhancement-2110.10757"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-multi-taper-features-for-deep-speaker-verification-2110.10983</loc><lastmod>2021-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-multi-taper-features-for-deep-speaker-verification-2110.10983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-multi-taper-features-for-deep-speaker-verification-2110.10983"/></url>
<url><loc>https://scifaro.com/en/abs/wav2clip-learning-robust-audio-representations-from-clip-2110.11499</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2clip-learning-robust-audio-representations-from-clip-2110.11499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2clip-learning-robust-audio-representations-from-clip-2110.11499"/></url>
<url><loc>https://scifaro.com/en/abs/signal-envelope-a-c-library-with-python-bindings-for-temporal-envelope-estimation-2110.11807</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-envelope-a-c-library-with-python-bindings-for-temporal-envelope-estimation-2110.11807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-envelope-a-c-library-with-python-bindings-for-temporal-envelope-estimation-2110.11807"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-ad-hoc-array-speech-enhancement-using-a-triple-path-network-2110.11844</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-ad-hoc-array-speech-enhancement-using-a-triple-path-network-2110.11844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-ad-hoc-array-speech-enhancement-using-a-triple-path-network-2110.11844"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-alignment-of-speech-and-language-latent-spaces-for-end-to-end-speech-recognition-and-understanding-2110.12138</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-alignment-of-speech-and-language-latent-spaces-for-end-to-end-speech-recognition-and-understanding-2110.12138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-alignment-of-speech-and-language-latent-spaces-for-end-to-end-speech-recognition-and-understanding-2110.12138"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-acoustic-space-for-an-efficient-sampling-in-neural-text-to-speech-2110.12539</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-acoustic-space-for-an-efficient-sampling-in-neural-text-to-speech-2110.12539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-acoustic-space-for-an-efficient-sampling-in-neural-text-to-speech-2110.12539"/></url>
<url><loc>https://scifaro.com/en/abs/lhotse-a-speech-data-representation-library-for-the-modern-deep-learning-ecosystem-2110.12561</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lhotse-a-speech-data-representation-library-for-the-modern-deep-learning-ecosystem-2110.12561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lhotse-a-speech-data-representation-library-for-the-modern-deep-learning-ecosystem-2110.12561"/></url>
<url><loc>https://scifaro.com/en/abs/delightfultts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2021-2110.12612</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delightfultts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2021-2110.12612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delightfultts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2021-2110.12612"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-for-audio-based-navigation-and-audio-source-localization-in-multi-speaker-environments-2110.12778</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-for-audio-based-navigation-and-audio-source-localization-in-multi-speaker-environments-2110.12778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-reinforcement-learning-approach-for-audio-based-navigation-and-audio-source-localization-in-multi-speaker-environments-2110.12778"/></url>
<url><loc>https://scifaro.com/en/abs/actions-speak-louder-than-listening-evaluating-music-style-transfer-based-on-editing-experience-2110.12855</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/actions-speak-louder-than-listening-evaluating-music-style-transfer-based-on-editing-experience-2110.12855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/actions-speak-louder-than-listening-evaluating-music-style-transfer-based-on-editing-experience-2110.12855"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-source-separation-by-steering-pretrained-music-models-2110.13071</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-source-separation-by-steering-pretrained-music-models-2110.13071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-source-separation-by-steering-pretrained-music-models-2110.13071"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-speech-enhancement-without-beamforming-2110.13130</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-speech-enhancement-without-beamforming-2110.13130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-speech-enhancement-without-beamforming-2110.13130"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-tools-for-audacity-helping-researchers-expand-the-artist-s-toolkit-2110.13323</loc><lastmod>2021-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-tools-for-audacity-helping-researchers-expand-the-artist-s-toolkit-2110.13323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-tools-for-audacity-helping-researchers-expand-the-artist-s-toolkit-2110.13323"/></url>
<url><loc>https://scifaro.com/en/abs/cs-rep-making-speaker-verification-networks-embracing-re-parameterization-2110.13465</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cs-rep-making-speaker-verification-networks-embracing-re-parameterization-2110.13465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cs-rep-making-speaker-verification-networks-embracing-re-parameterization-2110.13465"/></url>
<url><loc>https://scifaro.com/en/abs/aqp-an-open-modular-python-platform-for-objective-speech-and-audio-quality-metrics-2110.13589</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aqp-an-open-modular-python-platform-for-objective-speech-and-audio-quality-metrics-2110.13589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aqp-an-open-modular-python-platform-for-objective-speech-and-audio-quality-metrics-2110.13589"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-knowledge-distillation-for-on-device-audio-classification-2110.14131</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-knowledge-distillation-for-on-device-audio-classification-2110.14131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-knowledge-distillation-for-on-device-audio-classification-2110.14131"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-voice-conversion-via-self-supervised-prosody-representation-learning-2110.14422</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-via-self-supervised-prosody-representation-learning-2110.14422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-via-self-supervised-prosody-representation-learning-2110.14422"/></url>
<url><loc>https://scifaro.com/en/abs/generalizing-auc-optimization-to-multiclass-classification-for-audio-segmentation-with-limited-training-data-2110.14425</loc><lastmod>2021-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalizing-auc-optimization-to-multiclass-classification-for-audio-segmentation-with-limited-training-data-2110.14425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalizing-auc-optimization-to-multiclass-classification-for-audio-segmentation-with-limited-training-data-2110.14425"/></url>
<url><loc>https://scifaro.com/en/abs/nonnegative-tucker-decomposition-with-beta-divergence-for-music-structure-analysis-of-audio-signals-2110.14434</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonnegative-tucker-decomposition-with-beta-divergence-for-music-structure-analysis-of-audio-signals-2110.14434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonnegative-tucker-decomposition-with-beta-divergence-for-music-structure-analysis-of-audio-signals-2110.14434"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-single-song-autoencoding-schemes-for-audio-based-music-structure-analysis-2110.14437</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-single-song-autoencoding-schemes-for-audio-based-music-structure-analysis-2110.14437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-single-song-autoencoding-schemes-for-audio-based-music-structure-analysis-2110.14437"/></url>
<url><loc>https://scifaro.com/en/abs/neural-analysis-and-synthesis-reconstructing-speech-from-self-supervised-representations-2110.14513</loc><lastmod>2021-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-analysis-and-synthesis-reconstructing-speech-from-self-supervised-representations-2110.14513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-analysis-and-synthesis-reconstructing-speech-from-self-supervised-representations-2110.14513"/></url>
<url><loc>https://scifaro.com/en/abs/vrm-phase-i-vkw-system-description-of-long-short-video-customizable-keyword-wakeup-challenge-2110.15316</loc><lastmod>2021-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vrm-phase-i-vkw-system-description-of-long-short-video-customizable-keyword-wakeup-challenge-2110.15316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vrm-phase-i-vkw-system-description-of-long-short-video-customizable-keyword-wakeup-challenge-2110.15316"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noise-robustness-of-contrastive-speech-representation-learning-with-speech-reconstruction-2110.15430</loc><lastmod>2021-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noise-robustness-of-contrastive-speech-representation-learning-with-speech-reconstruction-2110.15430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noise-robustness-of-contrastive-speech-representation-learning-with-speech-reconstruction-2110.15430"/></url>
<url><loc>https://scifaro.com/en/abs/decision-attentive-regularization-to-improve-simultaneous-speech-translation-systems-2110.15729</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decision-attentive-regularization-to-improve-simultaneous-speech-translation-systems-2110.15729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decision-attentive-regularization-to-improve-simultaneous-speech-translation-systems-2110.15729"/></url>
<url><loc>https://scifaro.com/en/abs/vrain-upv-mllp-s-system-for-the-blizzard-challenge-2021-2110.15792</loc><lastmod>2021-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vrain-upv-mllp-s-system-for-the-blizzard-challenge-2021-2110.15792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vrain-upv-mllp-s-system-for-the-blizzard-challenge-2021-2110.15792"/></url>
<url><loc>https://scifaro.com/en/abs/learning-continuous-representation-of-audio-for-arbitrary-scale-super-resolution-2111.00195</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-continuous-representation-of-audio-for-arbitrary-scale-super-resolution-2111.00195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-continuous-representation-of-audio-for-arbitrary-scale-super-resolution-2111.00195"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-quaternion-convolutional-neural-networks-2111.00404</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-quaternion-convolutional-neural-networks-2111.00404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-quaternion-convolutional-neural-networks-2111.00404"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-north-indian-classical-ragas-using-tonnetz-2111.00436</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-north-indian-classical-ragas-using-tonnetz-2111.00436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-north-indian-classical-ragas-using-tonnetz-2111.00436"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-1d-state-space-for-efficient-music-rhythmic-analysis-2111.00704</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-1d-state-space-for-efficient-music-rhythmic-analysis-2111.00704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-1d-state-space-for-efficient-music-rhythmic-analysis-2111.00704"/></url>
<url><loc>https://scifaro.com/en/abs/a-mathematical-model-of-the-vowel-space-2111.00868</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mathematical-model-of-the-vowel-space-2111.00868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mathematical-model-of-the-vowel-space-2111.00868"/></url>
<url><loc>https://scifaro.com/en/abs/refinegan-universally-generating-waveform-better-than-ground-truth-with-highly-accurate-pitch-and-intensity-responses-2111.00962</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refinegan-universally-generating-waveform-better-than-ground-truth-with-highly-accurate-pitch-and-intensity-responses-2111.00962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refinegan-universally-generating-waveform-better-than-ground-truth-with-highly-accurate-pitch-and-intensity-responses-2111.00962"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-robustness-of-you-only-hear-once-yoho-algorithm-on-noisy-audios-in-the-voice-dataset-2111.01205</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-robustness-of-you-only-hear-once-yoho-algorithm-on-noisy-audios-in-the-voice-dataset-2111.01205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-robustness-of-you-only-hear-once-yoho-algorithm-on-noisy-audios-in-the-voice-dataset-2111.01205"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-generate-piano-music-with-sustain-pedals-2111.01216</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-generate-piano-music-with-sustain-pedals-2111.01216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-generate-piano-music-with-sustain-pedals-2111.01216"/></url>
<url><loc>https://scifaro.com/en/abs/attention-guided-generative-adversarial-network-for-whisper-to-normal-speech-conversion-2111.01342</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-guided-generative-adversarial-network-for-whisper-to-normal-speech-conversion-2111.01342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-guided-generative-adversarial-network-for-whisper-to-normal-speech-conversion-2111.01342"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-with-dual-adversarial-loss-for-bone-conducted-speech-enhancement-2111.01430</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-with-dual-adversarial-loss-for-bone-conducted-speech-enhancement-2111.01430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-with-dual-adversarial-loss-for-bone-conducted-speech-enhancement-2111.01430"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-speech-from-intracranial-depth-electrodes-using-an-encoder-decoder-framework-2111.01457</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-speech-from-intracranial-depth-electrodes-using-an-encoder-decoder-framework-2111.01457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-speech-from-intracranial-depth-electrodes-using-an-encoder-decoder-framework-2111.01457"/></url>
<url><loc>https://scifaro.com/en/abs/a-strongly-labelled-polyphonic-dataset-of-urban-sounds-with-spatiotemporal-context-2111.02006</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-strongly-labelled-polyphonic-dataset-of-urban-sounds-with-spatiotemporal-context-2111.02006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-strongly-labelled-polyphonic-dataset-of-urban-sounds-with-spatiotemporal-context-2111.02006"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-speaker-role-identification-in-air-traffic-communication-using-deep-learning-approaches-2111.02041</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-speaker-role-identification-in-air-traffic-communication-using-deep-learning-approaches-2111.02041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-speaker-role-identification-in-air-traffic-communication-using-deep-learning-approaches-2111.02041"/></url>
<url><loc>https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-nist-sre-2021-2111.02298</loc><lastmod>2021-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-nist-sre-2021-2111.02298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stc-speaker-recognition-systems-for-the-nist-sre-2021-2111.02298"/></url>
<url><loc>https://scifaro.com/en/abs/weight-block-or-unit-exploring-sparsity-tradeoffs-for-speech-enhancement-on-tiny-neural-accelerators-2111.02351</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weight-block-or-unit-exploring-sparsity-tradeoffs-for-speech-enhancement-on-tiny-neural-accelerators-2111.02351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weight-block-or-unit-exploring-sparsity-tradeoffs-for-speech-enhancement-on-tiny-neural-accelerators-2111.02351"/></url>
<url><loc>https://scifaro.com/en/abs/inqss-a-speech-intelligibility-and-quality-assessment-model-using-a-multi-task-learning-network-2111.02585</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inqss-a-speech-intelligibility-and-quality-assessment-model-using-a-multi-task-learning-network-2111.02585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inqss-a-speech-intelligibility-and-quality-assessment-model-using-a-multi-task-learning-network-2111.02585"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-for-air-traffic-control-via-feature-learning-and-end-to-end-training-2111.02654</loc><lastmod>2021-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-for-air-traffic-control-via-feature-learning-and-end-to-end-training-2111.02654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-for-air-traffic-control-via-feature-learning-and-end-to-end-training-2111.02654"/></url>
<url><loc>https://scifaro.com/en/abs/mt3-multi-task-multitrack-music-transcription-2111.03017</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mt3-multi-task-multitrack-music-transcription-2111.03017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mt3-multi-task-multitrack-music-transcription-2111.03017"/></url>
<url><loc>https://scifaro.com/en/abs/objective-measurement-of-pitch-extractors-responses-to-frequency-modulated-sounds-and-two-reference-pitch-extraction-methods-for-analyzing-voice-pitch-responses-to-auditory-stimulation-2111.03629</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-measurement-of-pitch-extractors-responses-to-frequency-modulated-sounds-and-two-reference-pitch-extraction-methods-for-analyzing-voice-pitch-responses-to-auditory-stimulation-2111.03629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-measurement-of-pitch-extractors-responses-to-frequency-modulated-sounds-and-two-reference-pitch-extraction-methods-for-analyzing-voice-pitch-responses-to-auditory-stimulation-2111.03629"/></url>
<url><loc>https://scifaro.com/en/abs/sig-vc-a-speaker-information-guided-zero-shot-voice-conversion-system-for-both-human-beings-and-machines-2111.03811</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sig-vc-a-speaker-information-guided-zero-shot-voice-conversion-system-for-both-human-beings-and-machines-2111.03811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sig-vc-a-speaker-information-guided-zero-shot-voice-conversion-system-for-both-human-beings-and-machines-2111.03811"/></url>
<url><loc>https://scifaro.com/en/abs/digital-audio-processing-tools-for-music-corpus-studies-2111.03895</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-audio-processing-tools-for-music-corpus-studies-2111.03895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-audio-processing-tools-for-music-corpus-studies-2111.03895"/></url>
<url><loc>https://scifaro.com/en/abs/towards-noise-robust-trigger-word-detection-with-contrastive-learning-pre-task-for-fast-on-boarding-of-new-trigger-words-2111.03971</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-noise-robust-trigger-word-detection-with-contrastive-learning-pre-task-for-fast-on-boarding-of-new-trigger-words-2111.03971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-noise-robust-trigger-word-detection-with-contrastive-learning-pre-task-for-fast-on-boarding-of-new-trigger-words-2111.03971"/></url>
<url><loc>https://scifaro.com/en/abs/meta-tts-meta-learning-for-few-shot-speaker-adaptive-text-to-speech-2111.04040</loc><lastmod>2022-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-tts-meta-learning-for-few-shot-speaker-adaptive-text-to-speech-2111.04040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-tts-meta-learning-for-few-shot-speaker-adaptive-text-to-speech-2111.04040"/></url>
<url><loc>https://scifaro.com/en/abs/theme-transformer-symbolic-music-generation-with-theme-conditioned-transformer-2111.04093</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theme-transformer-symbolic-music-generation-with-theme-conditioned-transformer-2111.04093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theme-transformer-symbolic-music-generation-with-theme-conditioned-transformer-2111.04093"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-the-adversarial-vulnerability-of-speech-self-supervised-learning-2111.04330</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-the-adversarial-vulnerability-of-speech-self-supervised-learning-2111.04330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-the-adversarial-vulnerability-of-speech-self-supervised-learning-2111.04330"/></url>
<url><loc>https://scifaro.com/en/abs/seofp-net-compression-and-acceleration-of-deep-neural-networks-for-speech-enhancement-using-sign-exponent-only-floating-points-2111.04436</loc><lastmod>2021-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seofp-net-compression-and-acceleration-of-deep-neural-networks-for-speech-enhancement-using-sign-exponent-only-floating-points-2111.04436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seofp-net-compression-and-acceleration-of-deep-neural-networks-for-speech-enhancement-using-sign-exponent-only-floating-points-2111.04436"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-low-power-keyword-spotting-at-the-edge-2111.04988</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-low-power-keyword-spotting-at-the-edge-2111.04988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-low-power-keyword-spotting-at-the-edge-2111.04988"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-generation-2111.05095</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-generation-2111.05095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-generation-2111.05095"/></url>
<url><loc>https://scifaro.com/en/abs/caesynth-real-time-timbre-interpolation-and-pitch-control-with-conditional-autoencoders-2111.05174</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/caesynth-real-time-timbre-interpolation-and-pitch-control-with-conditional-autoencoders-2111.05174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/caesynth-real-time-timbre-interpolation-and-pitch-control-with-conditional-autoencoders-2111.05174"/></url>
<url><loc>https://scifaro.com/en/abs/inclusive-speaker-verification-with-adaptive-thresholding-2111.05501</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inclusive-speaker-verification-with-adaptive-thresholding-2111.05501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inclusive-speaker-verification-with-adaptive-thresholding-2111.05501"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-chamberlin-digital-state-variable-filter-2111.05592</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-chamberlin-digital-state-variable-filter-2111.05592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-chamberlin-digital-state-variable-filter-2111.05592"/></url>
<url><loc>https://scifaro.com/en/abs/structure-from-silence-learning-scene-structure-from-ambient-sound-2111.05846</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structure-from-silence-learning-scene-structure-from-ambient-sound-2111.05846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structure-from-silence-learning-scene-structure-from-ambient-sound-2111.05846"/></url>
<url><loc>https://scifaro.com/en/abs/a-generic-deep-learning-based-cough-analysis-system-from-clinically-validated-samples-for-point-of-need-covid-19-test-and-severity-levels-2111.05895</loc><lastmod>2021-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generic-deep-learning-based-cough-analysis-system-from-clinically-validated-samples-for-point-of-need-covid-19-test-and-severity-levels-2111.05895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generic-deep-learning-based-cough-analysis-system-from-clinically-validated-samples-for-point-of-need-covid-19-test-and-severity-levels-2111.05895"/></url>
<url><loc>https://scifaro.com/en/abs/music-score-expansion-with-variable-length-infilling-2111.06046</loc><lastmod>2021-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-score-expansion-with-variable-length-infilling-2111.06046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-score-expansion-with-variable-length-infilling-2111.06046"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-noise-adaptive-speech-enhancement-by-discriminator-constrained-optimal-transport-2111.06316</loc><lastmod>2021-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-noise-adaptive-speech-enhancement-by-discriminator-constrained-optimal-transport-2111.06316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-noise-adaptive-speech-enhancement-by-discriminator-constrained-optimal-transport-2111.06316"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-efficient-voice-identification-using-wav2vec2-0-and-hubert-based-on-the-quran-reciters-dataset-2111.06331</loc><lastmod>2021-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-efficient-voice-identification-using-wav2vec2-0-and-hubert-based-on-the-quran-reciters-dataset-2111.06331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-efficient-voice-identification-using-wav2vec2-0-and-hubert-based-on-the-quran-reciters-dataset-2111.06331"/></url>
<url><loc>https://scifaro.com/en/abs/domain-generalization-on-efficient-acoustic-scene-classification-using-residual-normalization-2111.06531</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-generalization-on-efficient-acoustic-scene-classification-using-residual-normalization-2111.06531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-generalization-on-efficient-acoustic-scene-classification-using-residual-normalization-2111.06531"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-neural-network-based-approach-to-recognize-bangla-spoken-digits-from-speech-signal-2111.06625</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-neural-network-based-approach-to-recognize-bangla-spoken-digits-from-speech-signal-2111.06625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-neural-network-based-approach-to-recognize-bangla-spoken-digits-from-speech-signal-2111.06625"/></url>
<url><loc>https://scifaro.com/en/abs/fully-automatic-page-turning-on-real-scores-2111.06643</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-automatic-page-turning-on-real-scores-2111.06643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-automatic-page-turning-on-real-scores-2111.06643"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-deep-sparse-auto-encoder-extreme-learning-machine-with-a-new-weighting-scheme-and-spectro-temporal-features-along-with-classical-feature-selection-and-a-new-quantum-inspired-dimension-reduction-method-2111.07094</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-deep-sparse-auto-encoder-extreme-learning-machine-with-a-new-weighting-scheme-and-spectro-temporal-features-along-with-classical-feature-selection-and-a-new-quantum-inspired-dimension-reduction-method-2111.07094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-deep-sparse-auto-encoder-extreme-learning-machine-with-a-new-weighting-scheme-and-spectro-temporal-features-along-with-classical-feature-selection-and-a-new-quantum-inspired-dimension-reduction-method-2111.07094"/></url>
<url><loc>https://scifaro.com/en/abs/direct-noisy-speech-modeling-for-noisy-to-noisy-voice-conversion-2111.07116</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direct-noisy-speech-modeling-for-noisy-to-noisy-voice-conversion-2111.07116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direct-noisy-speech-modeling-for-noisy-to-noisy-voice-conversion-2111.07116"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-system-by-quaternion-nonlinear-echo-state-network-2111.07234</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-system-by-quaternion-nonlinear-echo-state-network-2111.07234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-system-by-quaternion-nonlinear-echo-state-network-2111.07234"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-attention-for-monaural-speech-enhancement-2111.07518</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-attention-for-monaural-speech-enhancement-2111.07518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-attention-for-monaural-speech-enhancement-2111.07518"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-loop-generation-with-vq-vae-2111.07657</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-loop-generation-with-vq-vae-2111.07657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-loop-generation-with-vq-vae-2111.07657"/></url>
<url><loc>https://scifaro.com/en/abs/metric-based-multimodal-meta-learning-for-human-movement-identification-via-footstep-recognition-2111.07979</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-based-multimodal-meta-learning-for-human-movement-identification-via-footstep-recognition-2111.07979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-based-multimodal-meta-learning-for-human-movement-identification-via-footstep-recognition-2111.07979"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploratory-study-on-perceptual-spaces-of-the-singing-voice-2111.08196</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploratory-study-on-perceptual-spaces-of-the-singing-voice-2111.08196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploratory-study-on-perceptual-spaces-of-the-singing-voice-2111.08196"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-acoustic-reflectors-using-a-robot-s-ego-noise-2111.08327</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-acoustic-reflectors-using-a-robot-s-ego-noise-2111.08327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-acoustic-reflectors-using-a-robot-s-ego-noise-2111.08327"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lightweight-controllable-audio-synthesis-with-conditional-implicit-neural-representations-2111.08462</loc><lastmod>2021-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lightweight-controllable-audio-synthesis-with-conditional-implicit-neural-representations-2111.08462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lightweight-controllable-audio-synthesis-with-conditional-implicit-neural-representations-2111.08462"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-singing-technique-conversion-2111.08839</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-singing-technique-conversion-2111.08839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-singing-technique-conversion-2111.08839"/></url>
<url><loc>https://scifaro.com/en/abs/information-fusion-in-attention-networks-using-adaptive-and-multi-level-factorized-bilinear-pooling-for-audio-visual-emotion-recognition-2111.08910</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/information-fusion-in-attention-networks-using-adaptive-and-multi-level-factorized-bilinear-pooling-for-audio-visual-emotion-recognition-2111.08910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/information-fusion-in-attention-networks-using-adaptive-and-multi-level-factorized-bilinear-pooling-for-audio-visual-emotion-recognition-2111.08910"/></url>
<url><loc>https://scifaro.com/en/abs/subject-enveloped-deep-sample-fuzzy-ensemble-learning-algorithm-of-parkinson-s-speech-data-2111.09014</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subject-enveloped-deep-sample-fuzzy-ensemble-learning-algorithm-of-parkinson-s-speech-data-2111.09014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subject-enveloped-deep-sample-fuzzy-ensemble-learning-algorithm-of-parkinson-s-speech-data-2111.09014"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-streaming-speech-synthesis-with-low-sentence-length-independent-latency-2111.09052</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-streaming-speech-synthesis-with-low-sentence-length-independent-latency-2111.09052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-streaming-speech-synthesis-with-low-sentence-length-independent-latency-2111.09052"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-low-resource-speaker-adaptation-using-phonological-features-2111.09075</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-low-resource-speaker-adaptation-using-phonological-features-2111.09075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-low-resource-speaker-adaptation-using-phonological-features-2111.09075"/></url>
<url><loc>https://scifaro.com/en/abs/rapping-singing-voice-synthesis-based-on-phoneme-level-prosody-control-2111.09146</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rapping-singing-voice-synthesis-based-on-phoneme-level-prosody-control-2111.09146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rapping-singing-voice-synthesis-based-on-phoneme-level-prosody-control-2111.09146"/></url>
<url><loc>https://scifaro.com/en/abs/towards-intelligibility-oriented-audio-visual-speech-enhancement-2111.09642</loc><lastmod>2021-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-intelligibility-oriented-audio-visual-speech-enhancement-2111.09642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-intelligibility-oriented-audio-visual-speech-enhancement-2111.09642"/></url>
<url><loc>https://scifaro.com/en/abs/dawdreamer-bridging-the-gap-between-digital-audio-workstations-and-python-interfaces-2111.09931</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dawdreamer-bridging-the-gap-between-digital-audio-workstations-and-python-interfaces-2111.09931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dawdreamer-bridging-the-gap-between-digital-audio-workstations-and-python-interfaces-2111.09931"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-wavetable-synthesis-2111.10003</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-wavetable-synthesis-2111.10003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-wavetable-synthesis-2111.10003"/></url>
<url><loc>https://scifaro.com/en/abs/improved-prosodic-clustering-for-multispeaker-and-speaker-independent-phoneme-level-prosody-control-2111.10168</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-prosodic-clustering-for-multispeaker-and-speaker-independent-phoneme-level-prosody-control-2111.10168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-prosodic-clustering-for-multispeaker-and-speaker-independent-phoneme-level-prosody-control-2111.10168"/></url>
<url><loc>https://scifaro.com/en/abs/word-level-style-control-for-expressive-non-attentive-speech-synthesis-2111.10173</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/word-level-style-control-for-expressive-non-attentive-speech-synthesis-2111.10173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/word-level-style-control-for-expressive-non-attentive-speech-synthesis-2111.10173"/></url>
<url><loc>https://scifaro.com/en/abs/prosodic-clustering-for-phoneme-level-prosody-control-in-end-to-end-speech-synthesis-2111.10177</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosodic-clustering-for-phoneme-level-prosody-control-in-end-to-end-speech-synthesis-2111.10177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosodic-clustering-for-phoneme-level-prosody-control-in-end-to-end-speech-synthesis-2111.10177"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-deep-urban-sound-classification-using-layer-wise-relevance-propagation-2111.10235</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-deep-urban-sound-classification-using-layer-wise-relevance-propagation-2111.10235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-deep-urban-sound-classification-using-layer-wise-relevance-propagation-2111.10235"/></url>
<url><loc>https://scifaro.com/en/abs/deep-spoken-keyword-spotting-an-overview-2111.10592</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-spoken-keyword-spotting-an-overview-2111.10592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-spoken-keyword-spotting-an-overview-2111.10592"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-acoustic-echo-cancellation-for-keyword-spotting-and-device-directed-speech-detection-2111.10639</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-acoustic-echo-cancellation-for-keyword-spotting-and-device-directed-speech-detection-2111.10639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-acoustic-echo-cancellation-for-keyword-spotting-and-device-directed-speech-detection-2111.10639"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-depression-from-stratified-samples-of-audio-data-2111.10783</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-depression-from-stratified-samples-of-audio-data-2111.10783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-depression-from-stratified-samples-of-audio-data-2111.10783"/></url>
<url><loc>https://scifaro.com/en/abs/health-monitoring-of-industrial-machines-using-scene-aware-threshold-selection-2111.10897</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/health-monitoring-of-industrial-machines-using-scene-aware-threshold-selection-2111.10897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/health-monitoring-of-industrial-machines-using-scene-aware-threshold-selection-2111.10897"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-3d-spatial-feature-2111.11023</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-3d-spatial-feature-2111.11023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-3d-spatial-feature-2111.11023"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-the-accuracy-of-deep-neural-networks-dnn-and-convolutional-neural-network-cnn-in-music-genre-recognition-mgr-experiments-on-kurdish-music-2111.11063</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-the-accuracy-of-deep-neural-networks-dnn-and-convolutional-neural-network-cnn-in-music-genre-recognition-mgr-experiments-on-kurdish-music-2111.11063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-the-accuracy-of-deep-neural-networks-dnn-and-convolutional-neural-network-cnn-in-music-genre-recognition-mgr-experiments-on-kurdish-music-2111.11063"/></url>
<url><loc>https://scifaro.com/en/abs/music-classification-beyond-supervised-learning-towards-real-world-applications-2111.11636</loc><lastmod>2021-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-classification-beyond-supervised-learning-towards-real-world-applications-2111.11636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-classification-beyond-supervised-learning-towards-real-world-applications-2111.11636"/></url>
<url><loc>https://scifaro.com/en/abs/adtof-a-large-dataset-of-non-synthetic-music-for-automatic-drum-transcription-2111.11737</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adtof-a-large-dataset-of-non-synthetic-music-for-automatic-drum-transcription-2111.11737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adtof-a-large-dataset-of-non-synthetic-music-for-automatic-drum-transcription-2111.11737"/></url>
<url><loc>https://scifaro.com/en/abs/guided-tts-a-diffusion-model-for-text-to-speech-via-classifier-guidance-2111.11755</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-tts-a-diffusion-model-for-text-to-speech-via-classifier-guidance-2111.11755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-tts-a-diffusion-model-for-text-to-speech-via-classifier-guidance-2111.11755"/></url>
<url><loc>https://scifaro.com/en/abs/upsampling-layers-for-music-source-separation-2111.11773</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/upsampling-layers-for-music-source-separation-2111.11773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/upsampling-layers-for-music-source-separation-2111.11773"/></url>
<url><loc>https://scifaro.com/en/abs/longitudinal-speech-biomarkers-for-automated-alzheimer-s-detection-2111.11859</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/longitudinal-speech-biomarkers-for-automated-alzheimer-s-detection-2111.11859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/longitudinal-speech-biomarkers-for-automated-alzheimer-s-detection-2111.11859"/></url>
<url><loc>https://scifaro.com/en/abs/towards-learning-universal-audio-representations-2111.12124</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-learning-universal-audio-representations-2111.12124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-learning-universal-audio-representations-2111.12124"/></url>
<url><loc>https://scifaro.com/en/abs/how-speech-is-recognized-to-be-emotional-a-study-based-on-information-decomposition-2111.12324</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-speech-is-recognized-to-be-emotional-a-study-based-on-information-decomposition-2111.12324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-speech-is-recognized-to-be-emotional-a-study-based-on-information-decomposition-2111.12324"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-decoupled-probabilistic-linear-discriminant-analysis-2111.12326</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-decoupled-probabilistic-linear-discriminant-analysis-2111.12326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-decoupled-probabilistic-linear-discriminant-analysis-2111.12326"/></url>
<url><loc>https://scifaro.com/en/abs/an-map-estimation-for-between-class-variance-2111.12331</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-map-estimation-for-between-class-variance-2111.12331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-map-estimation-for-between-class-variance-2111.12331"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-binaural-speech-intelligibility-prediction-from-discrete-latent-representations-2111.12531</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-binaural-speech-intelligibility-prediction-from-discrete-latent-representations-2111.12531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-binaural-speech-intelligibility-prediction-from-discrete-latent-representations-2111.12531"/></url>
<url><loc>https://scifaro.com/en/abs/towards-cross-cultural-analysis-using-music-information-dynamics-2111.12588</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-cross-cultural-analysis-using-music-information-dynamics-2111.12588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-cross-cultural-analysis-using-music-information-dynamics-2111.12588"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-audio-classification-with-partially-labeled-data-2111.12761</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-audio-classification-with-partially-labeled-data-2111.12761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-audio-classification-with-partially-labeled-data-2111.12761"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-detection-using-capsule-neural-network-on-multi-type-multi-scale-time-frequency-representation-2111.12869</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-using-capsule-neural-network-on-multi-type-multi-scale-time-frequency-representation-2111.12869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-using-capsule-neural-network-on-multi-type-multi-scale-time-frequency-representation-2111.12869"/></url>
<url><loc>https://scifaro.com/en/abs/a-muze-net-music-generation-by-composing-the-harmony-based-on-the-generated-melody-2111.12986</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-muze-net-music-generation-by-composing-the-harmony-based-on-the-generated-melody-2111.12986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-muze-net-music-generation-by-composing-the-harmony-based-on-the-generated-melody-2111.12986"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-music-tagging-transformer-2111.13457</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-music-tagging-transformer-2111.13457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-music-tagging-transformer-2111.13457"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-for-flexible-number-of-speakers-with-textual-information-2111.13694</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-for-flexible-number-of-speakers-with-textual-information-2111.13694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-for-flexible-number-of-speakers-with-textual-information-2111.13694"/></url>
<url><loc>https://scifaro.com/en/abs/how-deep-are-the-fakes-focusing-on-audio-deepfake-a-survey-2111.14203</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-deep-are-the-fakes-focusing-on-audio-deepfake-a-survey-2111.14203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-deep-are-the-fakes-focusing-on-audio-deepfake-a-survey-2111.14203"/></url>
<url><loc>https://scifaro.com/en/abs/responding-to-challenge-call-of-machine-learning-model-development-in-diagnosing-respiratory-disease-sounds-2111.14354</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/responding-to-challenge-call-of-machine-learning-model-development-in-diagnosing-respiratory-disease-sounds-2111.14354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/responding-to-challenge-call-of-machine-learning-model-development-in-diagnosing-respiratory-disease-sounds-2111.14354"/></url>
<url><loc>https://scifaro.com/en/abs/mixed-precision-dnn-qunatization-for-overlapped-speech-separation-and-recognition-2111.14479</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixed-precision-dnn-qunatization-for-overlapped-speech-separation-and-recognition-2111.14479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixed-precision-dnn-qunatization-for-overlapped-speech-separation-and-recognition-2111.14479"/></url>
<url><loc>https://scifaro.com/en/abs/catch-me-if-you-hear-me-audio-visual-navigation-in-complex-unmapped-environments-with-moving-sounds-2111.14843</loc><lastmod>2023-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catch-me-if-you-hear-me-audio-visual-navigation-in-complex-unmapped-environments-with-moving-sounds-2111.14843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catch-me-if-you-hear-me-audio-visual-navigation-in-complex-unmapped-environments-with-moving-sounds-2111.14843"/></url>
<url><loc>https://scifaro.com/en/abs/cycletransgan-evc-a-cyclegan-based-emotional-voice-conversion-model-with-transformer-2111.15159</loc><lastmod>2021-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycletransgan-evc-a-cyclegan-based-emotional-voice-conversion-model-with-transformer-2111.15159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycletransgan-evc-a-cyclegan-based-emotional-voice-conversion-model-with-transformer-2111.15159"/></url>
<url><loc>https://scifaro.com/en/abs/sp-sedt-self-supervised-pre-training-for-sound-event-detection-transformer-2111.15222</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sp-sedt-self-supervised-pre-training-for-sound-event-detection-transformer-2111.15222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sp-sedt-self-supervised-pre-training-for-sound-event-detection-transformer-2111.15222"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-extraction-using-onomatopoeic-words-2112.00209</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-extraction-using-onomatopoeic-words-2112.00209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-extraction-using-onomatopoeic-words-2112.00209"/></url>
<url><loc>https://scifaro.com/en/abs/score-transformer-generating-musical-score-from-note-level-representation-2112.00355</loc><lastmod>2021-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-transformer-generating-musical-score-from-note-level-representation-2112.00355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-transformer-generating-musical-score-from-note-level-representation-2112.00355"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-music-emotion-recognition-using-noisy-student-training-and-harmonic-pitch-class-profiles-2112.00702</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-music-emotion-recognition-using-noisy-student-training-and-harmonic-pitch-class-profiles-2112.00702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-music-emotion-recognition-using-noisy-student-training-and-harmonic-pitch-class-profiles-2112.00702"/></url>
<url><loc>https://scifaro.com/en/abs/music-to-dance-generation-with-optimal-transport-2112.01806</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-to-dance-generation-with-optimal-transport-2112.01806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-to-dance-generation-with-optimal-transport-2112.01806"/></url>
<url><loc>https://scifaro.com/en/abs/catch-me-if-you-can-blackbox-adversarial-attacks-on-automatic-speech-recognition-using-frequency-masking-2112.01821</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catch-me-if-you-can-blackbox-adversarial-attacks-on-automatic-speech-recognition-using-frequency-masking-2112.01821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catch-me-if-you-can-blackbox-adversarial-attacks-on-automatic-speech-recognition-using-frequency-masking-2112.01821"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-using-an-asynchronous-fully-recurrent-convolutional-neural-network-2112.02321</loc><lastmod>2021-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-using-an-asynchronous-fully-recurrent-convolutional-neural-network-2112.02321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-using-an-asynchronous-fully-recurrent-convolutional-neural-network-2112.02321"/></url>
<url><loc>https://scifaro.com/en/abs/yourtts-towards-zero-shot-multi-speaker-tts-and-zero-shot-voice-conversion-for-everyone-2112.02418</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yourtts-towards-zero-shot-multi-speaker-tts-and-zero-shot-voice-conversion-for-everyone-2112.02418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yourtts-towards-zero-shot-multi-speaker-tts-and-zero-shot-voice-conversion-for-everyone-2112.02418"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-deep-hierarchical-variational-autoencoder-for-voice-conversion-2112.02796</loc><lastmod>2021-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-deep-hierarchical-variational-autoencoder-for-voice-conversion-2112.02796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-deep-hierarchical-variational-autoencoder-for-voice-conversion-2112.02796"/></url>
<url><loc>https://scifaro.com/en/abs/vocbench-a-neural-vocoder-benchmark-for-speech-synthesis-2112.03099</loc><lastmod>2021-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocbench-a-neural-vocoder-benchmark-for-speech-synthesis-2112.03099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocbench-a-neural-vocoder-benchmark-for-speech-synthesis-2112.03099"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deepfake-perceptions-in-college-going-populations-2112.03351</loc><lastmod>2021-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deepfake-perceptions-in-college-going-populations-2112.03351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deepfake-perceptions-in-college-going-populations-2112.03351"/></url>
<url><loc>https://scifaro.com/en/abs/learning-music-audio-representations-via-weak-language-supervision-2112.04214</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-music-audio-representations-via-weak-language-supervision-2112.04214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-music-audio-representations-via-weak-language-supervision-2112.04214"/></url>
<url><loc>https://scifaro.com/en/abs/training-robust-zero-shot-voice-conversion-models-with-self-supervised-features-2112.04424</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-robust-zero-shot-voice-conversion-models-with-self-supervised-features-2112.04424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-robust-zero-shot-voice-conversion-models-with-self-supervised-features-2112.04424"/></url>
<url><loc>https://scifaro.com/en/abs/nice-beam-neural-integrated-covariance-estimators-for-time-varying-beamformers-2112.04613</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nice-beam-neural-integrated-covariance-estimators-for-time-varying-beamformers-2112.04613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nice-beam-neural-integrated-covariance-estimators-for-time-varying-beamformers-2112.04613"/></url>
<url><loc>https://scifaro.com/en/abs/cws-presunet-music-source-separation-with-channel-wise-subband-phase-aware-resunet-2112.04685</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cws-presunet-music-source-separation-with-channel-wise-subband-phase-aware-resunet-2112.04685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cws-presunet-music-source-separation-with-channel-wise-subband-phase-aware-resunet-2112.04685"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-blind-reverberation-time-estimation-using-noise-aware-time-frequency-masking-2112.04726</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-blind-reverberation-time-estimation-using-noise-aware-time-frequency-masking-2112.04726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-blind-reverberation-time-estimation-using-noise-aware-time-frequency-masking-2112.04726"/></url>
<url><loc>https://scifaro.com/en/abs/lipsound2-self-supervised-pre-training-for-lip-to-speech-reconstruction-and-lip-reading-2112.04748</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipsound2-self-supervised-pre-training-for-lip-to-speech-reconstruction-and-lip-reading-2112.04748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipsound2-self-supervised-pre-training-for-lip-to-speech-reconstruction-and-lip-reading-2112.04748"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-musically-induced-emotions-of-not-so-popular-colombian-music-2112.04975</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-musically-induced-emotions-of-not-so-popular-colombian-music-2112.04975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-musically-induced-emotions-of-not-so-popular-colombian-music-2112.04975"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-and-autoencoder-based-unsupervised-speech-enhancement-2112.05036</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-and-autoencoder-based-unsupervised-speech-enhancement-2112.05036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-and-autoencoder-based-unsupervised-speech-enhancement-2112.05036"/></url>
<url><loc>https://scifaro.com/en/abs/music-demixing-with-the-slicq-transform-2112.05509</loc><lastmod>2021-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-demixing-with-the-slicq-transform-2112.05509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-demixing-with-the-slicq-transform-2112.05509"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-1d-cnn-lstm-gru-model-with-data-augmentation-for-speech-emotion-recognition-2112.05666</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-1d-cnn-lstm-gru-model-with-data-augmentation-for-speech-emotion-recognition-2112.05666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-1d-cnn-lstm-gru-model-with-data-augmentation-for-speech-emotion-recognition-2112.05666"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-neural-networks-for-on-device-directional-hearing-2112.05893</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-neural-networks-for-on-device-directional-hearing-2112.05893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-neural-networks-for-on-device-directional-hearing-2112.05893"/></url>
<url><loc>https://scifaro.com/en/abs/u-shaped-transformer-with-frequency-band-aware-attention-for-speech-enhancement-2112.06052</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-shaped-transformer-with-frequency-band-aware-attention-for-speech-enhancement-2112.06052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-shaped-transformer-with-frequency-band-aware-attention-for-speech-enhancement-2112.06052"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-loss-with-recognition-model-for-single-channel-enhancement-and-robust-asr-2112.06068</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-loss-with-recognition-model-for-single-channel-enhancement-and-robust-asr-2112.06068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-loss-with-recognition-model-for-single-channel-enhancement-and-robust-asr-2112.06068"/></url>
<url><loc>https://scifaro.com/en/abs/visualising-and-explaining-deep-learning-models-for-speech-quality-prediction-2112.06219</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualising-and-explaining-deep-learning-models-for-speech-quality-prediction-2112.06219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualising-and-explaining-deep-learning-models-for-speech-quality-prediction-2112.06219"/></url>
<url><loc>https://scifaro.com/en/abs/pm-mmut-boosted-phone-mask-data-augmentation-using-multi-modeling-unit-training-for-phonetic-reduction-robust-e2e-speech-recognition-2112.06721</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pm-mmut-boosted-phone-mask-data-augmentation-using-multi-modeling-unit-training-for-phonetic-reduction-robust-e2e-speech-recognition-2112.06721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pm-mmut-boosted-phone-mask-data-augmentation-using-multi-modeling-unit-training-for-phonetic-reduction-robust-e2e-speech-recognition-2112.06721"/></url>
<url><loc>https://scifaro.com/en/abs/computational-bioacoustics-with-deep-learning-a-review-and-roadmap-2112.06725</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computational-bioacoustics-with-deep-learning-a-review-and-roadmap-2112.06725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computational-bioacoustics-with-deep-learning-a-review-and-roadmap-2112.06725"/></url>
<url><loc>https://scifaro.com/en/abs/mean-square-error-based-secondary-source-placement-in-sound-field-synthesis-with-prior-information-on-desired-field-2112.06774</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mean-square-error-based-secondary-source-placement-in-sound-field-synthesis-with-prior-information-on-desired-field-2112.06774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mean-square-error-based-secondary-source-placement-in-sound-field-synthesis-with-prior-information-on-desired-field-2112.06774"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-neural-voice-camouflage-2112.07076</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-neural-voice-camouflage-2112.07076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-neural-voice-camouflage-2112.07076"/></url>
<url><loc>https://scifaro.com/en/abs/explore-long-range-context-feature-for-speaker-verification-2112.07134</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explore-long-range-context-feature-for-speaker-verification-2112.07134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explore-long-range-context-feature-for-speaker-verification-2112.07134"/></url>
<url><loc>https://scifaro.com/en/abs/embedding-based-music-emotion-recognition-using-composite-loss-2112.07192</loc><lastmod>2023-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedding-based-music-emotion-recognition-using-composite-loss-2112.07192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedding-based-music-emotion-recognition-using-composite-loss-2112.07192"/></url>
<url><loc>https://scifaro.com/en/abs/noise-reduction-and-driving-event-extraction-method-for-performance-improvement-on-driving-noise-based-surface-anomaly-detection-2112.07214</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-reduction-and-driving-event-extraction-method-for-performance-improvement-on-driving-noise-based-surface-anomaly-detection-2112.07214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-reduction-and-driving-event-extraction-method-for-performance-improvement-on-driving-noise-based-surface-anomaly-detection-2112.07214"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-covid-19-disease-diagnosis-using-1d-convolutional-neural-network-and-augmentation-with-human-respiratory-sound-based-on-parameters-cough-breath-and-voice-2112.07285</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-covid-19-disease-diagnosis-using-1d-convolutional-neural-network-and-augmentation-with-human-respiratory-sound-based-on-parameters-cough-breath-and-voice-2112.07285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-covid-19-disease-diagnosis-using-1d-convolutional-neural-network-and-augmentation-with-human-respiratory-sound-based-on-parameters-cough-breath-and-voice-2112.07285"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-learning-for-multi-zone-sound-field-reproduction-under-harsh-environmental-conditions-2112.07349</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-learning-for-multi-zone-sound-field-reproduction-under-harsh-environmental-conditions-2112.07349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-learning-for-multi-zone-sound-field-reproduction-under-harsh-environmental-conditions-2112.07349"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-diarization-with-transformer-2112.07463</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-with-transformer-2112.07463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-with-transformer-2112.07463"/></url>
<url><loc>https://scifaro.com/en/abs/a-literature-review-on-covid-19-disease-diagnosis-from-respiratory-sound-data-2112.07670</loc><lastmod>2021-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-literature-review-on-covid-19-disease-diagnosis-from-respiratory-sound-data-2112.07670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-literature-review-on-covid-19-disease-diagnosis-from-respiratory-sound-data-2112.07670"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-source-separation-through-query-based-learning-from-weakly-labeled-data-2112.07891</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-source-separation-through-query-based-learning-from-weakly-labeled-data-2112.07891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-source-separation-through-query-based-learning-from-weakly-labeled-data-2112.07891"/></url>
<url><loc>https://scifaro.com/en/abs/the-exploitation-of-multiple-feature-extraction-techniques-for-speaker-identification-in-emotional-states-under-disguised-voices-2112.07940</loc><lastmod>2021-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-exploitation-of-multiple-feature-extraction-techniques-for-speaker-identification-in-emotional-states-under-disguised-voices-2112.07940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-exploitation-of-multiple-feature-extraction-techniques-for-speaker-identification-in-emotional-states-under-disguised-voices-2112.07940"/></url>
<url><loc>https://scifaro.com/en/abs/speech-frame-implementation-for-speech-analysis-and-recognition-2112.08027</loc><lastmod>2021-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-frame-implementation-for-speech-analysis-and-recognition-2112.08027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-frame-implementation-for-speech-analysis-and-recognition-2112.08027"/></url>
<url><loc>https://scifaro.com/en/abs/emotionbox-a-music-element-driven-emotional-music-generation-system-using-recurrent-neural-network-2112.08561</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotionbox-a-music-element-driven-emotional-music-generation-system-using-recurrent-neural-network-2112.08561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotionbox-a-music-element-driven-emotional-music-generation-system-using-recurrent-neural-network-2112.08561"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-leveraging-alternative-soft-targets-from-non-parallel-qualified-speech-data-2112.08878</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-leveraging-alternative-soft-targets-from-non-parallel-qualified-speech-data-2112.08878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-leveraging-alternative-soft-targets-from-non-parallel-qualified-speech-data-2112.08878"/></url>
<url><loc>https://scifaro.com/en/abs/connecting-the-dots-between-audio-and-text-without-parallel-data-through-visual-knowledge-transfer-2112.08995</loc><lastmod>2022-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/connecting-the-dots-between-audio-and-text-without-parallel-data-through-visual-knowledge-transfer-2112.08995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/connecting-the-dots-between-audio-and-text-without-parallel-data-through-visual-knowledge-transfer-2112.08995"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-real-time-audio-visual-speech-enhancement-2112.09060</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-real-time-audio-visual-speech-enhancement-2112.09060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-real-time-audio-visual-speech-enhancement-2112.09060"/></url>
<url><loc>https://scifaro.com/en/abs/midi-ddsp-detailed-control-of-musical-performance-via-hierarchical-modeling-2112.09312</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-ddsp-detailed-control-of-musical-performance-via-hierarchical-modeling-2112.09312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-ddsp-detailed-control-of-musical-performance-via-hierarchical-modeling-2112.09312"/></url>
<url><loc>https://scifaro.com/en/abs/jtubespeech-corpus-of-japanese-speech-collected-from-youtube-for-speech-recognition-and-speaker-verification-2112.09323</loc><lastmod>2021-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jtubespeech-corpus-of-japanese-speech-collected-from-youtube-for-speech-recognition-and-speaker-verification-2112.09323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jtubespeech-corpus-of-japanese-speech-collected-from-youtube-for-speech-recognition-and-speaker-verification-2112.09323"/></url>
<url><loc>https://scifaro.com/en/abs/discretization-and-re-synthesis-an-alternative-method-to-solve-the-cocktail-party-problem-2112.09382</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discretization-and-re-synthesis-an-alternative-method-to-solve-the-cocktail-party-problem-2112.09382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discretization-and-re-synthesis-an-alternative-method-to-solve-the-cocktail-party-problem-2112.09382"/></url>
<url><loc>https://scifaro.com/en/abs/linguistic-and-gender-variation-in-speech-emotion-recognition-using-spectral-features-2112.09596</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linguistic-and-gender-variation-in-speech-emotion-recognition-using-spectral-features-2112.09596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linguistic-and-gender-variation-in-speech-emotion-recognition-using-spectral-features-2112.09596"/></url>
<url><loc>https://scifaro.com/en/abs/soundify-matching-sound-effects-to-video-2112.09726</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundify-matching-sound-effects-to-video-2112.09726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundify-matching-sound-effects-to-video-2112.09726"/></url>
<url><loc>https://scifaro.com/en/abs/detect-what-you-want-target-sound-detection-2112.10153</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detect-what-you-want-target-sound-detection-2112.10153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detect-what-you-want-target-sound-detection-2112.10153"/></url>
<url><loc>https://scifaro.com/en/abs/generating-chord-progression-from-melody-with-flexible-harmonic-rhythm-and-controllable-harmonic-density-2112.11122</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-chord-progression-from-melody-with-flexible-harmonic-rhythm-and-controllable-harmonic-density-2112.11122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-chord-progression-from-melody-with-flexible-harmonic-rhythm-and-controllable-harmonic-density-2112.11122"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-complex-cycle-consistent-2112.11142</loc><lastmod>2021-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-complex-cycle-consistent-2112.11142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-complex-cycle-consistent-2112.11142"/></url>
<url><loc>https://scifaro.com/en/abs/safeguarding-test-signals-for-acoustic-measurement-using-arbitrary-sounds-2112.11373</loc><lastmod>2021-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/safeguarding-test-signals-for-acoustic-measurement-using-arbitrary-sounds-2112.11373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/safeguarding-test-signals-for-acoustic-measurement-using-arbitrary-sounds-2112.11373"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-multi-task-pre-training-2112.11459</loc><lastmod>2022-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-multi-task-pre-training-2112.11459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-based-monaural-speech-enhancement-with-multi-task-pre-training-2112.11459"/></url>
<url><loc>https://scifaro.com/en/abs/graph-attentive-feature-aggregation-for-text-independent-speaker-verification-2112.12343</loc><lastmod>2021-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-attentive-feature-aggregation-for-text-independent-speaker-verification-2112.12343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-attentive-feature-aggregation-for-text-independent-speaker-verification-2112.12343"/></url>
<url><loc>https://scifaro.com/en/abs/multi-variant-consistency-based-self-supervised-learning-for-robust-automatic-speech-recognition-2112.12522</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-variant-consistency-based-self-supervised-learning-for-robust-automatic-speech-recognition-2112.12522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-variant-consistency-based-self-supervised-learning-for-robust-automatic-speech-recognition-2112.12522"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-real-time-on-chip-audio-super-resolution-for-bone-conduction-microphones-2112.13156</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-real-time-on-chip-audio-super-resolution-for-bone-conduction-microphones-2112.13156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-real-time-on-chip-audio-super-resolution-for-bone-conduction-microphones-2112.13156"/></url>
<url><loc>https://scifaro.com/en/abs/novel-dual-channel-long-short-term-memory-compressed-capsule-networks-for-emotion-recognition-2112.13350</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-dual-channel-long-short-term-memory-compressed-capsule-networks-for-emotion-recognition-2112.13350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-dual-channel-long-short-term-memory-compressed-capsule-networks-for-emotion-recognition-2112.13350"/></url>
<url><loc>https://scifaro.com/en/abs/novel-hybrid-dnn-approaches-for-speaker-verification-in-emotional-and-stressful-talking-environments-2112.13353</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-hybrid-dnn-approaches-for-speaker-verification-in-emotional-and-stressful-talking-environments-2112.13353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-hybrid-dnn-approaches-for-speaker-verification-in-emotional-and-stressful-talking-environments-2112.13353"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-auditory-datasets-2112.13450</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-auditory-datasets-2112.13450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-auditory-datasets-2112.13450"/></url>
<url><loc>https://scifaro.com/en/abs/retrieving-effective-acoustic-impedance-and-refractive-index-for-size-mismatch-samples-2112.13453</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieving-effective-acoustic-impedance-and-refractive-index-for-size-mismatch-samples-2112.13453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieving-effective-acoustic-impedance-and-refractive-index-for-size-mismatch-samples-2112.13453"/></url>
<url><loc>https://scifaro.com/en/abs/bilingual-speech-recognition-by-estimating-speaker-geometry-from-video-data-2112.13463</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bilingual-speech-recognition-by-estimating-speaker-geometry-from-video-data-2112.13463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bilingual-speech-recognition-by-estimating-speaker-geometry-from-video-data-2112.13463"/></url>
<url><loc>https://scifaro.com/en/abs/feature-extraction-with-mel-scale-separation-method-on-noise-audio-recordings-2112.14930</loc><lastmod>2022-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-extraction-with-mel-scale-separation-method-on-noise-audio-recordings-2112.14930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-extraction-with-mel-scale-separation-method-on-noise-audio-recordings-2112.14930"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-symbolic-arrangement-via-cross-modal-music-representation-learning-2112.15110</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-symbolic-arrangement-via-cross-modal-music-representation-learning-2112.15110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-symbolic-arrangement-via-cross-modal-music-representation-learning-2112.15110"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-deep-music-generation-methods-using-data-augmentation-2201.00052</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-deep-music-generation-methods-using-data-augmentation-2201.00052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-deep-music-generation-methods-using-data-augmentation-2201.00052"/></url>
<url><loc>https://scifaro.com/en/abs/bird-species-classification-and-acoustic-features-selection-based-on-distributed-neural-network-with-two-stage-windowing-of-short-term-features-2201.00124</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bird-species-classification-and-acoustic-features-selection-based-on-distributed-neural-network-with-two-stage-windowing-of-short-term-features-2201.00124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bird-species-classification-and-acoustic-features-selection-based-on-distributed-neural-network-with-two-stage-windowing-of-short-term-features-2201.00124"/></url>
<url><loc>https://scifaro.com/en/abs/generating-adversarial-samples-for-training-wake-up-word-detection-systems-against-confusing-words-2201.00167</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-adversarial-samples-for-training-wake-up-word-detection-systems-against-confusing-words-2201.00167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-adversarial-samples-for-training-wake-up-word-detection-systems-against-confusing-words-2201.00167"/></url>
<url><loc>https://scifaro.com/en/abs/classifying-autism-from-crowdsourced-semi-structured-speech-recordings-a-machine-learning-approach-2201.00927</loc><lastmod>2022-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classifying-autism-from-crowdsourced-semi-structured-speech-recordings-a-machine-learning-approach-2201.00927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classifying-autism-from-crowdsourced-semi-structured-speech-recordings-a-machine-learning-approach-2201.00927"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-longitudinal-cough-breath-and-voice-data-for-covid-19-progression-prediction-via-sequential-deep-learning-model-development-and-validation-2201.01232</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-longitudinal-cough-breath-and-voice-data-for-covid-19-progression-prediction-via-sequential-deep-learning-model-development-and-validation-2201.01232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-longitudinal-cough-breath-and-voice-data-for-covid-19-progression-prediction-via-sequential-deep-learning-model-development-and-validation-2201.01232"/></url>
<url><loc>https://scifaro.com/en/abs/robust-self-supervised-audio-visual-speech-recognition-2201.01763</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-self-supervised-audio-visual-speech-recognition-2201.01763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-self-supervised-audio-visual-speech-recognition-2201.01763"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-beat-tracking-in-musical-signals-with-polyphonic-contrastive-learning-2201.01771</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-beat-tracking-in-musical-signals-with-polyphonic-contrastive-learning-2201.01771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-beat-tracking-in-musical-signals-with-polyphonic-contrastive-learning-2201.01771"/></url>
<url><loc>https://scifaro.com/en/abs/implementing-simple-spectral-denoising-for-environmental-audio-recordings-2201.02099</loc><lastmod>2022-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementing-simple-spectral-denoising-for-environmental-audio-recordings-2201.02099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementing-simple-spectral-denoising-for-environmental-audio-recordings-2201.02099"/></url>
<url><loc>https://scifaro.com/en/abs/a-sinusoidal-signal-reconstruction-method-for-the-inversion-of-the-mel-spectrogram-2201.02483</loc><lastmod>2022-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sinusoidal-signal-reconstruction-method-for-the-inversion-of-the-mel-spectrogram-2201.02483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sinusoidal-signal-reconstruction-method-for-the-inversion-of-the-mel-spectrogram-2201.02483"/></url>
<url><loc>https://scifaro.com/en/abs/audio-representations-for-deep-learning-in-sound-synthesis-a-review-2201.02490</loc><lastmod>2022-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-representations-for-deep-learning-in-sound-synthesis-a-review-2201.02490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-representations-for-deep-learning-in-sound-synthesis-a-review-2201.02490"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-audio-representation-using-space-filling-curves-2201.02805</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-audio-representation-using-space-filling-curves-2201.02805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-audio-representation-using-space-filling-curves-2201.02805"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-speaker-identification-using-a-novel-capsule-nets-model-2201.02994</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-speaker-identification-using-a-novel-capsule-nets-model-2201.02994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-speaker-identification-using-a-novel-capsule-nets-model-2201.02994"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-of-deep-learning-frameworks-applied-for-predicting-respiratory-anomalies-2201.03054</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-of-deep-learning-frameworks-applied-for-predicting-respiratory-anomalies-2201.03054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-of-deep-learning-frameworks-applied-for-predicting-respiratory-anomalies-2201.03054"/></url>
<url><loc>https://scifaro.com/en/abs/local-information-assisted-attention-free-decoder-for-audio-captioning-2201.03217</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-information-assisted-attention-free-decoder-for-audio-captioning-2201.03217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-information-assisted-attention-free-decoder-for-audio-captioning-2201.03217"/></url>
<url><loc>https://scifaro.com/en/abs/sub-mw-keyword-spotting-on-an-mcu-analog-binary-feature-extraction-and-binary-neural-networks-2201.03386</loc><lastmod>2022-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-mw-keyword-spotting-on-an-mcu-analog-binary-feature-extraction-and-binary-neural-networks-2201.03386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-mw-keyword-spotting-on-an-mcu-analog-binary-feature-extraction-and-binary-neural-networks-2201.03386"/></url>
<url><loc>https://scifaro.com/en/abs/music2video-automatic-generation-of-music-video-with-fusion-of-audio-and-text-2201.03809</loc><lastmod>2022-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music2video-automatic-generation-of-music-video-with-fusion-of-audio-and-text-2201.03809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music2video-automatic-generation-of-music-video-with-fusion-of-audio-and-text-2201.03809"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-intensity-and-its-control-for-emotional-voice-conversion-2201.03967</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-intensity-and-its-control-for-emotional-voice-conversion-2201.03967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-intensity-and-its-control-for-emotional-voice-conversion-2201.03967"/></url>
<url><loc>https://scifaro.com/en/abs/sound-dr-reliable-sound-dataset-and-baseline-artificial-intelligence-system-for-respiratory-illnesses-2201.04581</loc><lastmod>2023-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-dr-reliable-sound-dataset-and-baseline-artificial-intelligence-system-for-respiratory-illnesses-2201.04581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-dr-reliable-sound-dataset-and-baseline-artificial-intelligence-system-for-respiratory-illnesses-2201.04581"/></url>
<url><loc>https://scifaro.com/en/abs/voxsrc-2021-the-third-voxceleb-speaker-recognition-challenge-2201.04583</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxsrc-2021-the-third-voxceleb-speaker-recognition-challenge-2201.04583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxsrc-2021-the-third-voxceleb-speaker-recognition-challenge-2201.04583"/></url>
<url><loc>https://scifaro.com/en/abs/the-effectiveness-of-time-stretching-for-enhancing-dysarthric-speech-for-improved-dysarthric-speech-recognition-2201.04908</loc><lastmod>2022-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effectiveness-of-time-stretching-for-enhancing-dysarthric-speech-for-improved-dysarthric-speech-recognition-2201.04908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effectiveness-of-time-stretching-for-enhancing-dysarthric-speech-for-improved-dysarthric-speech-recognition-2201.04908"/></url>
<url><loc>https://scifaro.com/en/abs/fish-sounds-towards-the-evaluation-of-marine-acoustic-biodiversity-through-data-driven-audio-source-separation-2201.05013</loc><lastmod>2022-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fish-sounds-towards-the-evaluation-of-marine-acoustic-biodiversity-through-data-driven-audio-source-separation-2201.05013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fish-sounds-towards-the-evaluation-of-marine-acoustic-biodiversity-through-data-driven-audio-source-separation-2201.05013"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-chord-vocabularies-exploiting-pitch-relationships-in-a-chord-estimation-metric-2201.05244</loc><lastmod>2022-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-chord-vocabularies-exploiting-pitch-relationships-in-a-chord-estimation-metric-2201.05244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-chord-vocabularies-exploiting-pitch-relationships-in-a-chord-estimation-metric-2201.05244"/></url>
<url><loc>https://scifaro.com/en/abs/multiphonic-modeling-using-impulse-pattern-formulation-ipf-2201.05452</loc><lastmod>2022-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiphonic-modeling-using-impulse-pattern-formulation-ipf-2201.05452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiphonic-modeling-using-impulse-pattern-formulation-ipf-2201.05452"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-using-spectral-temporal-information-fusion-2201.05510</loc><lastmod>2022-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-spectral-temporal-information-fusion-2201.05510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-spectral-temporal-information-fusion-2201.05510"/></url>
<url><loc>https://scifaro.com/en/abs/spectro-temporal-deep-features-for-disordered-speech-assessment-and-recognition-2201.05554</loc><lastmod>2022-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectro-temporal-deep-features-for-disordered-speech-assessment-and-recognition-2201.05554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectro-temporal-deep-features-for-disordered-speech-assessment-and-recognition-2201.05554"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-data-augmentation-techniques-for-disordered-speech-recognition-2201.05562</loc><lastmod>2022-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-data-augmentation-techniques-for-disordered-speech-recognition-2201.05562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-data-augmentation-techniques-for-disordered-speech-recognition-2201.05562"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-multi-task-learning-method-for-symbolic-music-emotion-recognition-2201.05782</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-multi-task-learning-method-for-symbolic-music-emotion-recognition-2201.05782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-multi-task-learning-method-for-symbolic-music-emotion-recognition-2201.05782"/></url>
<url><loc>https://scifaro.com/en/abs/convmixer-feature-interactive-convolution-with-curriculum-learning-for-small-footprint-and-noisy-far-field-keyword-spotting-2201.05863</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convmixer-feature-interactive-convolution-with-curriculum-learning-for-small-footprint-and-noisy-far-field-keyword-spotting-2201.05863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convmixer-feature-interactive-convolution-with-curriculum-learning-for-small-footprint-and-noisy-far-field-keyword-spotting-2201.05863"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-the-repetition-based-recovering-of-acoustic-and-visual-sources-with-dendritic-neurons-2201.06123</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-the-repetition-based-recovering-of-acoustic-and-visual-sources-with-dendritic-neurons-2201.06123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-the-repetition-based-recovering-of-acoustic-and-visual-sources-with-dendritic-neurons-2201.06123"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-study-of-acoustic-echo-cancellation-algorithms-for-speech-recognition-system-in-noisy-environment-2201.06209</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-study-of-acoustic-echo-cancellation-algorithms-for-speech-recognition-system-in-noisy-environment-2201.06209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-study-of-acoustic-echo-cancellation-algorithms-for-speech-recognition-system-in-noisy-environment-2201.06209"/></url>
<url><loc>https://scifaro.com/en/abs/on-training-targets-and-activation-functions-for-deep-representation-learning-in-text-dependent-speaker-verification-2201.06426</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-training-targets-and-activation-functions-for-deep-representation-learning-in-text-dependent-speaker-verification-2201.06426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-training-targets-and-activation-functions-for-deep-representation-learning-in-text-dependent-speaker-verification-2201.06426"/></url>
<url><loc>https://scifaro.com/en/abs/msemotts-multi-scale-emotion-transfer-prediction-and-control-for-emotional-speech-synthesis-2201.06460</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msemotts-multi-scale-emotion-transfer-prediction-and-control-for-emotional-speech-synthesis-2201.06460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msemotts-multi-scale-emotion-transfer-prediction-and-control-for-emotional-speech-synthesis-2201.06460"/></url>
<url><loc>https://scifaro.com/en/abs/opencpop-a-high-quality-open-source-chinese-popular-song-corpus-for-singing-voice-synthesis-2201.07429</loc><lastmod>2022-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/opencpop-a-high-quality-open-source-chinese-popular-song-corpus-for-singing-voice-synthesis-2201.07429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/opencpop-a-high-quality-open-source-chinese-popular-song-corpus-for-singing-voice-synthesis-2201.07429"/></url>
<url><loc>https://scifaro.com/en/abs/mhtts-fast-multi-head-text-to-speech-for-spontaneous-speech-with-imperfect-transcription-2201.07438</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mhtts-fast-multi-head-text-to-speech-for-spontaneous-speech-with-imperfect-transcription-2201.07438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mhtts-fast-multi-head-text-to-speech-for-spontaneous-speech-with-imperfect-transcription-2201.07438"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-personalization-of-an-emotion-recognition-system-the-unique-properties-of-the-externalization-of-valence-in-speech-2201.07876</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-personalization-of-an-emotion-recognition-system-the-unique-properties-of-the-externalization-of-valence-in-speech-2201.07876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-personalization-of-an-emotion-recognition-system-the-unique-properties-of-the-externalization-of-valence-in-speech-2201.07876"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-text-to-speech-using-multi-task-learning-and-speaker-classifier-joint-training-2201.08124</loc><lastmod>2022-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-text-to-speech-using-multi-task-learning-and-speaker-classifier-joint-training-2201.08124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-text-to-speech-using-multi-task-learning-and-speaker-classifier-joint-training-2201.08124"/></url>
<url><loc>https://scifaro.com/en/abs/kinit-classification-in-ethiopian-chants-azmaris-and-modern-music-a-new-dataset-and-cnn-benchmark-2201.08448</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kinit-classification-in-ethiopian-chants-azmaris-and-modern-music-a-new-dataset-and-cnn-benchmark-2201.08448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kinit-classification-in-ethiopian-chants-azmaris-and-modern-music-a-new-dataset-and-cnn-benchmark-2201.08448"/></url>
<url><loc>https://scifaro.com/en/abs/can-machines-generate-personalized-music-a-hybrid-favorite-aware-method-for-user-preference-music-transfer-2201.08526</loc><lastmod>2022-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-machines-generate-personalized-music-a-hybrid-favorite-aware-method-for-user-preference-music-transfer-2201.08526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-machines-generate-personalized-music-a-hybrid-favorite-aware-method-for-user-preference-music-transfer-2201.08526"/></url>
<url><loc>https://scifaro.com/en/abs/nas-vad-neural-architecture-search-for-voice-activity-detection-2201.09032</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nas-vad-neural-architecture-search-for-voice-activity-detection-2201.09032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nas-vad-neural-architecture-search-for-voice-activity-detection-2201.09032"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-auditory-acoustic-features-for-the-diagnosis-of-the-covid-19-2201.09110</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-auditory-acoustic-features-for-the-diagnosis-of-the-covid-19-2201.09110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-auditory-acoustic-features-for-the-diagnosis-of-the-covid-19-2201.09110"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-neural-speech-coding-for-real-time-communications-2201.09429</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-neural-speech-coding-for-real-time-communications-2201.09429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-neural-speech-coding-for-real-time-communications-2201.09429"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-style-and-speaker-attributes-for-tts-style-transfer-2201.09472</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-style-and-speaker-attributes-for-tts-style-transfer-2201.09472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-style-and-speaker-attributes-for-tts-style-transfer-2201.09472"/></url>
<url><loc>https://scifaro.com/en/abs/bias-in-automated-speaker-recognition-2201.09486</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bias-in-automated-speaker-recognition-2201.09486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bias-in-automated-speaker-recognition-2201.09486"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-music-source-separation-using-differentiable-parametric-source-models-2201.09592</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-music-source-separation-using-differentiable-parametric-source-models-2201.09592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-music-source-separation-using-differentiable-parametric-source-models-2201.09592"/></url>
<url><loc>https://scifaro.com/en/abs/improving-factored-hybrid-hmm-acoustic-modeling-without-state-tying-2201.09692</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-factored-hybrid-hmm-acoustic-modeling-without-state-tying-2201.09692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-factored-hybrid-hmm-acoustic-modeling-without-state-tying-2201.09692"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-tandem-speaker-verification-and-anti-spoofing-systems-2201.09709</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-tandem-speaker-verification-and-anti-spoofing-systems-2201.09709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-tandem-speaker-verification-and-anti-spoofing-systems-2201.09709"/></url>
<url><loc>https://scifaro.com/en/abs/improving-adversarial-waveform-generation-based-singing-voice-conversion-with-harmonic-signals-2201.10130</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-adversarial-waveform-generation-based-singing-voice-conversion-with-harmonic-signals-2201.10130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-adversarial-waveform-generation-based-singing-voice-conversion-with-harmonic-signals-2201.10130"/></url>
<url><loc>https://scifaro.com/en/abs/improved-mispronunciation-detection-system-using-a-hybrid-ctc-att-based-approach-for-l2-english-speakers-2201.10198</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-mispronunciation-detection-system-using-a-hybrid-ctc-att-based-approach-for-l2-english-speakers-2201.10198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-mispronunciation-detection-system-using-a-hybrid-ctc-att-based-approach-for-l2-english-speakers-2201.10198"/></url>
<url><loc>https://scifaro.com/en/abs/sasv-challenge-2022-a-spoofing-aware-speaker-verification-challenge-evaluation-plan-2201.10283</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sasv-challenge-2022-a-spoofing-aware-speaker-verification-challenge-evaluation-plan-2201.10283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sasv-challenge-2022-a-spoofing-aware-speaker-verification-challenge-evaluation-plan-2201.10283"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-hybrid-models-of-tensor-train-networks-for-spoken-command-recognition-2201.10609</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-hybrid-models-of-tensor-train-networks-for-spoken-command-recognition-2201.10609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-hybrid-models-of-tensor-train-networks-for-spoken-command-recognition-2201.10609"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-voice-conversion-with-domain-adversarial-training-2201.10693</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-voice-conversion-with-domain-adversarial-training-2201.10693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-voice-conversion-with-domain-adversarial-training-2201.10693"/></url>
<url><loc>https://scifaro.com/en/abs/j-mac-japanese-multi-speaker-audiobook-corpus-for-speech-synthesis-2201.10896</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/j-mac-japanese-multi-speaker-audiobook-corpus-for-speech-synthesis-2201.10896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/j-mac-japanese-multi-speaker-audiobook-corpus-for-speech-synthesis-2201.10896"/></url>
<url><loc>https://scifaro.com/en/abs/figaro-generating-symbolic-music-with-fine-grained-artistic-control-2201.10936</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/figaro-generating-symbolic-music-with-fine-grained-artistic-control-2201.10936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/figaro-generating-symbolic-music-with-fine-grained-artistic-control-2201.10936"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-wavelet-packet-transform-for-data-adapted-spectrograms-2201.11069</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-wavelet-packet-transform-for-data-adapted-spectrograms-2201.11069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-wavelet-packet-transform-for-data-adapted-spectrograms-2201.11069"/></url>
<url><loc>https://scifaro.com/en/abs/rapid-solution-for-searching-similar-audio-items-2201.11178</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rapid-solution-for-searching-similar-audio-items-2201.11178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rapid-solution-for-searching-similar-audio-items-2201.11178"/></url>
<url><loc>https://scifaro.com/en/abs/discovering-phonetic-inventories-with-crosslingual-automatic-speech-recognition-2201.11207</loc><lastmod>2022-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discovering-phonetic-inventories-with-crosslingual-automatic-speech-recognition-2201.11207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discovering-phonetic-inventories-with-crosslingual-automatic-speech-recognition-2201.11207"/></url>
<url><loc>https://scifaro.com/en/abs/the-msxf-tts-system-for-icassp-2022-add-challenge-2201.11400</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-msxf-tts-system-for-icassp-2022-add-challenge-2201.11400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-msxf-tts-system-for-icassp-2022-add-challenge-2201.11400"/></url>
<url><loc>https://scifaro.com/en/abs/dual-learning-music-composition-and-dance-choreography-2201.11999</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-learning-music-composition-and-dance-choreography-2201.11999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-learning-music-composition-and-dance-choreography-2201.11999"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-audio-captioning-using-attention-weighted-event-based-embeddings-2201.12352</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-audio-captioning-using-attention-weighted-event-based-embeddings-2201.12352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-audio-captioning-using-attention-weighted-event-based-embeddings-2201.12352"/></url>
<url><loc>https://scifaro.com/en/abs/it-owave-it-o-stochastic-differential-equation-is-all-you-need-for-wave-generation-2201.12519</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/it-owave-it-o-stochastic-differential-equation-is-all-you-need-for-wave-generation-2201.12519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/it-owave-it-o-stochastic-differential-equation-is-all-you-need-for-wave-generation-2201.12519"/></url>
<url><loc>https://scifaro.com/en/abs/the-hccl-dku-system-for-fake-audio-generation-task-of-the-2022-icassp-add-challenge-2201.12567</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hccl-dku-system-for-fake-audio-generation-task-of-the-2022-icassp-add-challenge-2201.12567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hccl-dku-system-for-fake-audio-generation-task-of-the-2022-icassp-add-challenge-2201.12567"/></url>
<url><loc>https://scifaro.com/en/abs/partitura-a-python-package-for-handling-symbolic-musical-data-2201.13144</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partitura-a-python-package-for-handling-symbolic-musical-data-2201.13144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partitura-a-python-package-for-handling-symbolic-musical-data-2201.13144"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-digital-signal-processing-mixture-model-for-synthesis-parameter-extraction-from-mixture-of-harmonic-sounds-2202.00200</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-digital-signal-processing-mixture-model-for-synthesis-parameter-extraction-from-mixture-of-harmonic-sounds-2202.00200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-digital-signal-processing-mixture-model-for-synthesis-parameter-extraction-from-mixture-of-harmonic-sounds-2202.00200"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-removing-head-movements-on-audio-visual-speech-enhancement-2202.00538</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-removing-head-movements-on-audio-visual-speech-enhancement-2202.00538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-removing-head-movements-on-audio-visual-speech-enhancement-2202.00538"/></url>
<url><loc>https://scifaro.com/en/abs/hts-at-a-hierarchical-token-semantic-audio-transformer-for-sound-classification-and-detection-2202.00874</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hts-at-a-hierarchical-token-semantic-audio-transformer-for-sound-classification-and-detection-2202.00874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hts-at-a-hierarchical-token-semantic-audio-transformer-for-sound-classification-and-detection-2202.00874"/></url>
<url><loc>https://scifaro.com/en/abs/melody-extraction-from-polyphonic-music-by-deep-learning-approaches-a-review-2202.01078</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-extraction-from-polyphonic-music-by-deep-learning-approaches-a-review-2202.01078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-extraction-from-polyphonic-music-by-deep-learning-approaches-a-review-2202.01078"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-emergency-vehicle-event-detection-using-audio-data-2202.01367</loc><lastmod>2022-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-emergency-vehicle-event-detection-using-audio-data-2202.01367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-emergency-vehicle-event-detection-using-audio-data-2202.01367"/></url>
<url><loc>https://scifaro.com/en/abs/a-psychoacoustic-quality-criterion-for-path-traced-sound-propagation-2202.01582</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-psychoacoustic-quality-criterion-for-path-traced-sound-propagation-2202.01582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-psychoacoustic-quality-criterion-for-path-traced-sound-propagation-2202.01582"/></url>
<url><loc>https://scifaro.com/en/abs/the-royalflush-system-of-speech-recognition-for-m2met-challenge-2202.01614</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-royalflush-system-of-speech-recognition-for-m2met-challenge-2202.01614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-royalflush-system-of-speech-recognition-for-m2met-challenge-2202.01614"/></url>
<url><loc>https://scifaro.com/en/abs/mfa-tdnn-with-multi-scale-frequency-channel-attention-for-text-independent-speaker-verification-with-short-utterances-2202.01624</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfa-tdnn-with-multi-scale-frequency-channel-attention-for-text-independent-speaker-verification-with-short-utterances-2202.01624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfa-tdnn-with-multi-scale-frequency-channel-attention-for-text-independent-speaker-verification-with-short-utterances-2202.01624"/></url>
<url><loc>https://scifaro.com/en/abs/improving-lyrics-alignment-through-joint-pitch-detection-2202.01646</loc><lastmod>2022-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-lyrics-alignment-through-joint-pitch-detection-2202.01646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-lyrics-alignment-through-joint-pitch-detection-2202.01646"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-anomaly-detection-2202.01784</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-anomaly-detection-2202.01784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-anomaly-detection-2202.01784"/></url>
<url><loc>https://scifaro.com/en/abs/musical-audio-similarity-with-self-supervised-convolutional-neural-networks-2202.02112</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-audio-similarity-with-self-supervised-convolutional-neural-networks-2202.02112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-audio-similarity-with-self-supervised-convolutional-neural-networks-2202.02112"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-pitch-detection-with-convolutional-recurrent-neural-networks-2202.02115</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-pitch-detection-with-convolutional-recurrent-neural-networks-2202.02115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-pitch-detection-with-convolutional-recurrent-neural-networks-2202.02115"/></url>
<url><loc>https://scifaro.com/en/abs/seed-sound-event-early-detection-via-evidential-uncertainty-2202.02441</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seed-sound-event-early-detection-via-evidential-uncertainty-2202.02441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seed-sound-event-early-detection-via-evidential-uncertainty-2202.02441"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-beam-filter-for-real-time-multi-channel-speech-enhancement-2202.02500</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-beam-filter-for-real-time-multi-channel-speech-enhancement-2202.02500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-beam-filter-for-real-time-multi-channel-speech-enhancement-2202.02500"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-a-real-time-wavelet-based-algorithm-for-improving-speech-intelligibility-2202.02545</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-a-real-time-wavelet-based-algorithm-for-improving-speech-intelligibility-2202.02545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-a-real-time-wavelet-based-algorithm-for-improving-speech-intelligibility-2202.02545"/></url>
<url><loc>https://scifaro.com/en/abs/deep-impulse-responses-estimating-and-parameterizing-filters-with-deep-networks-2202.03416</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-impulse-responses-estimating-and-parameterizing-filters-with-deep-networks-2202.03416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-impulse-responses-estimating-and-parameterizing-filters-with-deep-networks-2202.03416"/></url>
<url><loc>https://scifaro.com/en/abs/maximizing-audio-event-detection-model-performance-on-small-datasets-through-knowledge-transfer-data-augmentation-and-pretraining-an-ablation-study-2202.03514</loc><lastmod>2022-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximizing-audio-event-detection-model-performance-on-small-datasets-through-knowledge-transfer-data-augmentation-and-pretraining-an-ablation-study-2202.03514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximizing-audio-event-detection-model-performance-on-small-datasets-through-knowledge-transfer-data-augmentation-and-pretraining-an-ablation-study-2202.03514"/></url>
<url><loc>https://scifaro.com/en/abs/summary-on-the-icassp-2022-multi-channel-multi-party-meeting-transcription-grand-challenge-2202.03647</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/summary-on-the-icassp-2022-multi-channel-multi-party-meeting-transcription-grand-challenge-2202.03647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/summary-on-the-icassp-2022-multi-channel-multi-party-meeting-transcription-grand-challenge-2202.03647"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-self-supervised-features-2202.03896</loc><lastmod>2022-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-self-supervised-features-2202.03896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-self-supervised-features-2202.03896"/></url>
<url><loc>https://scifaro.com/en/abs/the-volcspeech-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04261</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-volcspeech-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-volcspeech-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04261"/></url>
<url><loc>https://scifaro.com/en/abs/cau-ku-team-s-submission-to-add-2022-challenge-task-1-low-quality-fake-audio-detection-through-frequency-feature-masking-2202.04328</loc><lastmod>2022-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cau-ku-team-s-submission-to-add-2022-challenge-task-1-low-quality-fake-audio-detection-through-frequency-feature-masking-2202.04328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cau-ku-team-s-submission-to-add-2022-challenge-task-1-low-quality-fake-audio-detection-through-frequency-feature-masking-2202.04328"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-audio-rendering-in-the-spherical-harmonic-domain-a-summary-of-the-mathematics-and-its-pitfalls-2202.04393</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-audio-rendering-in-the-spherical-harmonic-domain-a-summary-of-the-mathematics-and-its-pitfalls-2202.04393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-audio-rendering-in-the-spherical-harmonic-domain-a-summary-of-the-mathematics-and-its-pitfalls-2202.04393"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-drums-generation-using-compound-word-representations-2202.04464</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-drums-generation-using-compound-word-representations-2202.04464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-drums-generation-using-compound-word-representations-2202.04464"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-audio-visual-information-fusion-using-canonical-correlated-graph-neural-network-for-energy-efficient-speech-enhancement-2202.04528</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-audio-visual-information-fusion-using-canonical-correlated-graph-neural-network-for-energy-efficient-speech-enhancement-2202.04528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-audio-visual-information-fusion-using-canonical-correlated-graph-neural-network-for-energy-efficient-speech-enhancement-2202.04528"/></url>
<url><loc>https://scifaro.com/en/abs/shas-approaching-optimal-segmentation-for-end-to-end-speech-translation-2202.04774</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shas-approaching-optimal-segmentation-for-end-to-end-speech-translation-2202.04774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shas-approaching-optimal-segmentation-for-end-to-end-speech-translation-2202.04774"/></url>
<url><loc>https://scifaro.com/en/abs/royalflush-speaker-diarization-system-for-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04814</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/royalflush-speaker-diarization-system-for-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/royalflush-speaker-diarization-system-for-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.04814"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-model-based-phase-aware-bayesian-spectral-amplitude-estimator-for-single-channel-speech-enhancement-2202.04882</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-model-based-phase-aware-bayesian-spectral-amplitude-estimator-for-single-channel-speech-enhancement-2202.04882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-model-based-phase-aware-bayesian-spectral-amplitude-estimator-for-single-channel-speech-enhancement-2202.04882"/></url>
<url><loc>https://scifaro.com/en/abs/sound-masking-degrades-perception-of-self-location-during-stepping-a-case-for-sound-transparent-spacesuits-for-mars-2202.04958</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-masking-degrades-perception-of-self-location-during-stepping-a-case-for-sound-transparent-spacesuits-for-mars-2202.04958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-masking-degrades-perception-of-self-location-during-stepping-a-case-for-sound-transparent-spacesuits-for-mars-2202.04958"/></url>
<url><loc>https://scifaro.com/en/abs/barwise-compression-schemes-for-audio-based-music-structure-analysis-2202.04981</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/barwise-compression-schemes-for-audio-based-music-structure-analysis-2202.04981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/barwise-compression-schemes-for-audio-based-music-structure-analysis-2202.04981"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-convolutive-nmf-for-automatic-piano-transcription-2202.04989</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-convolutive-nmf-for-automatic-piano-transcription-2202.04989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-convolutive-nmf-for-automatic-piano-transcription-2202.04989"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-nonlinear-compression-for-robust-speaker-verification-2202.05236</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-nonlinear-compression-for-robust-speaker-verification-2202.05236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-nonlinear-compression-for-robust-speaker-verification-2202.05236"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-by-using-psychoacoustical-model-inspired-fusion-framework-2202.05272</loc><lastmod>2025-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-by-using-psychoacoustical-model-inspired-fusion-framework-2202.05272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-by-using-psychoacoustical-model-inspired-fusion-framework-2202.05272"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-description-of-capabilities-and-constraints-for-a-computational-auditory-system-an-artificial-ear-for-cognitive-architectures-2202.05332</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-description-of-capabilities-and-constraints-for-a-computational-auditory-system-an-artificial-ear-for-cognitive-architectures-2202.05332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-description-of-capabilities-and-constraints-for-a-computational-auditory-system-an-artificial-ear-for-cognitive-architectures-2202.05332"/></url>
<url><loc>https://scifaro.com/en/abs/faag-fast-adversarial-audio-generation-through-interactive-attack-optimisation-2202.05416</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/faag-fast-adversarial-audio-generation-through-interactive-attack-optimisation-2202.05416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/faag-fast-adversarial-audio-generation-through-interactive-attack-optimisation-2202.05416"/></url>
<url><loc>https://scifaro.com/en/abs/a-sonification-of-the-zcosmos-galaxy-dataset-2202.05539</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sonification-of-the-zcosmos-galaxy-dataset-2202.05539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sonification-of-the-zcosmos-galaxy-dataset-2202.05539"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-deep-learning-frameworks-for-detecting-covid-19-2202.05626</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-deep-learning-frameworks-for-detecting-covid-19-2202.05626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-deep-learning-frameworks-for-detecting-covid-19-2202.05626"/></url>
<url><loc>https://scifaro.com/en/abs/audio-defect-detection-in-music-with-deep-networks-2202.05718</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-defect-detection-in-music-with-deep-networks-2202.05718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-defect-detection-in-music-with-deep-networks-2202.05718"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-speech-intelligibility-enhancement-model-based-on-canonicalcorrelation-and-deep-learning-2202.05756</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-speech-intelligibility-enhancement-model-based-on-canonicalcorrelation-and-deep-learning-2202.05756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-speech-intelligibility-enhancement-model-based-on-canonicalcorrelation-and-deep-learning-2202.05756"/></url>
<url><loc>https://scifaro.com/en/abs/the-hamse-ontology-using-semantic-technologies-to-support-music-representation-interoperability-and-musicological-analysis-2202.05817</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hamse-ontology-using-semantic-technologies-to-support-music-representation-interoperability-and-musicological-analysis-2202.05817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hamse-ontology-using-semantic-technologies-to-support-music-representation-interoperability-and-musicological-analysis-2202.05817"/></url>
<url><loc>https://scifaro.com/en/abs/wav2vec2-0-on-the-edge-performance-evaluation-2202.05993</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2vec2-0-on-the-edge-performance-evaluation-2202.05993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2vec2-0-on-the-edge-performance-evaluation-2202.05993"/></url>
<url><loc>https://scifaro.com/en/abs/deep-performer-score-to-audio-music-performance-synthesis-2202.06034</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-performer-score-to-audio-music-performance-synthesis-2202.06034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-performer-score-to-audio-music-performance-synthesis-2202.06034"/></url>
<url><loc>https://scifaro.com/en/abs/learning-long-term-music-representations-via-hierarchical-contextual-constraints-2202.06180</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-long-term-music-representations-via-hierarchical-contextual-constraints-2202.06180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-long-term-music-representations-via-hierarchical-contextual-constraints-2202.06180"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-deep-residual-echo-suppression-with-echo-aware-loss-2202.06850</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-deep-residual-echo-suppression-with-echo-aware-loss-2202.06850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-deep-residual-echo-suppression-with-echo-aware-loss-2202.06850"/></url>
<url><loc>https://scifaro.com/en/abs/multi-style-training-for-south-african-call-centre-audio-2202.07219</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-style-training-for-south-african-call-centre-audio-2202.07219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-style-training-for-south-african-call-centre-audio-2202.07219"/></url>
<url><loc>https://scifaro.com/en/abs/speechpainter-text-conditioned-speech-inpainting-2202.07273</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechpainter-text-conditioned-speech-inpainting-2202.07273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechpainter-text-conditioned-speech-inpainting-2202.07273"/></url>
<url><loc>https://scifaro.com/en/abs/phase-vocoder-done-right-2202.07382</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-vocoder-done-right-2202.07382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-vocoder-done-right-2202.07382"/></url>
<url><loc>https://scifaro.com/en/abs/audio-inpainting-via-ell-1-minimization-and-dictionary-learning-2202.07479</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-inpainting-via-ell-1-minimization-and-dictionary-learning-2202.07479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-inpainting-via-ell-1-minimization-and-dictionary-learning-2202.07479"/></url>
<url><loc>https://scifaro.com/en/abs/phase-based-signal-representations-for-scattering-2202.07484</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-based-signal-representations-for-scattering-2202.07484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-based-signal-representations-for-scattering-2202.07484"/></url>
<url><loc>https://scifaro.com/en/abs/non-iterative-filter-bank-phase-re-construction-2202.07498</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-iterative-filter-bank-phase-re-construction-2202.07498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-iterative-filter-bank-phase-re-construction-2202.07498"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-in-the-waveform-domain-with-self-attention-2202.07790</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-in-the-waveform-domain-with-self-attention-2202.07790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-in-the-waveform-domain-with-self-attention-2202.07790"/></url>
<url><loc>https://scifaro.com/en/abs/learning-deep-direct-path-relative-transfer-function-for-binaural-sound-source-localization-2202.07841</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-deep-direct-path-relative-transfer-function-for-binaural-sound-source-localization-2202.07841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-deep-direct-path-relative-transfer-function-for-binaural-sound-source-localization-2202.07841"/></url>
<url><loc>https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-conversation-level-characteristics-2202.07855</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-conversation-level-characteristics-2202.07855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-conversation-level-characteristics-2202.07855"/></url>
<url><loc>https://scifaro.com/en/abs/srp-dnn-learning-direct-path-phase-difference-for-multiple-moving-sound-source-localization-2202.07859</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/srp-dnn-learning-direct-path-phase-difference-for-multiple-moving-sound-source-localization-2202.07859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/srp-dnn-learning-direct-path-phase-difference-for-multiple-moving-sound-source-localization-2202.07859"/></url>
<url><loc>https://scifaro.com/en/abs/singing-tacotron-global-duration-control-attention-and-dynamic-filter-for-end-to-end-singing-voice-synthesis-2202.07907</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-tacotron-global-duration-control-attention-and-dynamic-filter-for-end-to-end-singing-voice-synthesis-2202.07907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-tacotron-global-duration-control-attention-and-dynamic-filter-for-end-to-end-singing-voice-synthesis-2202.07907"/></url>
<url><loc>https://scifaro.com/en/abs/dbt-net-dual-branch-federative-magnitude-and-phase-estimation-with-attention-in-attention-transformer-for-monaural-speech-enhancement-2202.07931</loc><lastmod>2022-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dbt-net-dual-branch-federative-magnitude-and-phase-estimation-with-attention-in-attention-transformer-for-monaural-speech-enhancement-2202.07931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dbt-net-dual-branch-federative-magnitude-and-phase-estimation-with-attention-in-attention-transformer-for-monaural-speech-enhancement-2202.07931"/></url>
<url><loc>https://scifaro.com/en/abs/on-loss-functions-and-evaluation-metrics-for-music-source-separation-2202.07968</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-loss-functions-and-evaluation-metrics-for-music-source-separation-2202.07968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-loss-functions-and-evaluation-metrics-for-music-source-separation-2202.07968"/></url>
<url><loc>https://scifaro.com/en/abs/adima-abuse-detection-in-multilingual-audio-2202.07991</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adima-abuse-detection-in-multilingual-audio-2202.07991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adima-abuse-detection-in-multilingual-audio-2202.07991"/></url>
<url><loc>https://scifaro.com/en/abs/chord-conditioned-melody-harmonization-with-controllable-harmonicity-2202.08423</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chord-conditioned-melody-harmonization-with-controllable-harmonicity-2202.08423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chord-conditioned-melody-harmonization-with-controllable-harmonicity-2202.08423"/></url>
<url><loc>https://scifaro.com/en/abs/add-2022-the-first-audio-deep-synthesis-detection-challenge-2202.08433</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/add-2022-the-first-audio-deep-synthesis-detection-challenge-2202.08433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/add-2022-the-first-audio-deep-synthesis-detection-challenge-2202.08433"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-designing-compact-audio-visual-wake-word-spotting-system-based-on-iterative-fine-tuning-in-neural-network-pruning-2202.08509</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-designing-compact-audio-visual-wake-word-spotting-system-based-on-iterative-fine-tuning-in-neural-network-pruning-2202.08509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-designing-compact-audio-visual-wake-word-spotting-system-based-on-iterative-fine-tuning-in-neural-network-pruning-2202.08509"/></url>
<url><loc>https://scifaro.com/en/abs/remixit-continual-self-training-of-speech-enhancement-models-via-bootstrapped-remixing-2202.08862</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remixit-continual-self-training-of-speech-enhancement-models-via-bootstrapped-remixing-2202.08862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remixit-continual-self-training-of-speech-enhancement-models-via-bootstrapped-remixing-2202.08862"/></url>
<url><loc>https://scifaro.com/en/abs/word-embeddings-for-automatic-equalization-in-audio-mixing-2202.08898</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/word-embeddings-for-automatic-equalization-in-audio-mixing-2202.08898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/word-embeddings-for-automatic-equalization-in-audio-mixing-2202.08898"/></url>
<url><loc>https://scifaro.com/en/abs/attributable-watermarking-of-speech-generative-models-2202.08900</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attributable-watermarking-of-speech-generative-models-2202.08900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attributable-watermarking-of-speech-generative-models-2202.08900"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-emotion-recognition-using-transfer-learning-from-speaker-recognition-and-bert-based-models-2202.08974</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-using-transfer-learning-from-speaker-recognition-and-bert-based-models-2202.08974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-using-transfer-learning-from-speaker-recognition-and-bert-based-models-2202.08974"/></url>
<url><loc>https://scifaro.com/en/abs/a-summary-of-the-compare-covid-19-challenges-2202.08981</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-summary-of-the-compare-covid-19-challenges-2202.08981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-summary-of-the-compare-covid-19-challenges-2202.08981"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-sex-and-stroke-success-computer-aided-player-grunt-analysis-in-tennis-matches-2202.09102</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-sex-and-stroke-success-computer-aided-player-grunt-analysis-in-tennis-matches-2202.09102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-sex-and-stroke-success-computer-aided-player-grunt-analysis-in-tennis-matches-2202.09102"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-architectures-for-multi-pitch-estimation-towards-reliable-evaluation-2202.09198</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-architectures-for-multi-pitch-estimation-towards-reliable-evaluation-2202.09198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-architectures-for-multi-pitch-estimation-towards-reliable-evaluation-2202.09198"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-neuromorphic-spike-encoding-of-sound-using-information-theory-2202.09619</loc><lastmod>2023-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-neuromorphic-spike-encoding-of-sound-using-information-theory-2202.09619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-neuromorphic-spike-encoding-of-sound-using-information-theory-2202.09619"/></url>
<url><loc>https://scifaro.com/en/abs/it-s-raw-audio-generation-with-state-space-models-2202.09729</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/it-s-raw-audio-generation-with-state-space-models-2202.09729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/it-s-raw-audio-generation-with-state-space-models-2202.09729"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-affective-representations-of-music-induced-eeg-through-multimodal-supervision-and-latent-domain-adaptation-2202.09750</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-affective-representations-of-music-induced-eeg-through-multimodal-supervision-and-latent-domain-adaptation-2202.09750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-affective-representations-of-music-induced-eeg-through-multimodal-supervision-and-latent-domain-adaptation-2202.09750"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automatic-transcription-of-polyphonic-electric-guitar-music-a-new-dataset-and-a-multi-loss-transformer-model-2202.09907</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automatic-transcription-of-polyphonic-electric-guitar-music-a-new-dataset-and-a-multi-loss-transformer-model-2202.09907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automatic-transcription-of-polyphonic-electric-guitar-music-a-new-dataset-and-a-multi-loss-transformer-model-2202.09907"/></url>
<url><loc>https://scifaro.com/en/abs/campnet-context-aware-mask-prediction-for-end-to-end-text-based-speech-editing-2202.09950</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/campnet-context-aware-mask-prediction-for-end-to-end-text-based-speech-editing-2202.09950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/campnet-context-aware-mask-prediction-for-end-to-end-text-based-speech-editing-2202.09950"/></url>
<url><loc>https://scifaro.com/en/abs/avqvc-one-shot-voice-conversion-by-vector-quantization-with-applying-contrastive-learning-2202.10020</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avqvc-one-shot-voice-conversion-by-vector-quantization-with-applying-contrastive-learning-2202.10020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avqvc-one-shot-voice-conversion-by-vector-quantization-with-applying-contrastive-learning-2202.10020"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-on-speech-recognition-systems-for-mission-critical-applications-a-survey-2202.10594</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-on-speech-recognition-systems-for-mission-critical-applications-a-survey-2202.10594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-on-speech-recognition-systems-for-mission-critical-applications-a-survey-2202.10594"/></url>
<url><loc>https://scifaro.com/en/abs/nnspeech-speaker-guided-conditional-variational-autoencoder-for-zero-shot-multi-speaker-text-to-speech-2202.10712</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nnspeech-speaker-guided-conditional-variational-autoencoder-for-zero-shot-multi-speaker-text-to-speech-2202.10712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nnspeech-speaker-guided-conditional-variational-autoencoder-for-zero-shot-multi-speaker-text-to-speech-2202.10712"/></url>
<url><loc>https://scifaro.com/en/abs/improving-cross-lingual-speech-synthesis-with-triplet-training-scheme-2202.10729</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-cross-lingual-speech-synthesis-with-triplet-training-scheme-2202.10729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-cross-lingual-speech-synthesis-with-triplet-training-scheme-2202.10729"/></url>
<url><loc>https://scifaro.com/en/abs/sound-adversarial-audio-visual-navigation-2202.10910</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-adversarial-audio-visual-navigation-2202.10910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-adversarial-audio-visual-navigation-2202.10910"/></url>
<url><loc>https://scifaro.com/en/abs/drvc-a-framework-of-any-to-any-voice-conversion-with-self-supervised-learning-2202.10976</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drvc-a-framework-of-any-to-any-voice-conversion-with-self-supervised-learning-2202.10976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drvc-a-framework-of-any-to-any-voice-conversion-with-self-supervised-learning-2202.10976"/></url>
<url><loc>https://scifaro.com/en/abs/flowsense-monitoring-airflow-in-building-ventilation-systems-using-audio-sensing-2202.11136</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowsense-monitoring-airflow-in-building-ventilation-systems-using-audio-sensing-2202.11136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowsense-monitoring-airflow-in-building-ventilation-systems-using-audio-sensing-2202.11136"/></url>
<url><loc>https://scifaro.com/en/abs/towards-speaker-age-estimation-with-label-distribution-learning-2202.11424</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-speaker-age-estimation-with-label-distribution-learning-2202.11424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-speaker-age-estimation-with-label-distribution-learning-2202.11424"/></url>
<url><loc>https://scifaro.com/en/abs/listen-to-interpret-post-hoc-interpretability-for-audio-networks-with-nmf-2202.11479</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-to-interpret-post-hoc-interpretability-for-audio-networks-with-nmf-2202.11479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-to-interpret-post-hoc-interpretability-for-audio-networks-with-nmf-2202.11479"/></url>
<url><loc>https://scifaro.com/en/abs/differentially-private-speaker-anonymization-2202.11823</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentially-private-speaker-anonymization-2202.11823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentially-private-speaker-anonymization-2202.11823"/></url>
<url><loc>https://scifaro.com/en/abs/phase-continuity-learning-derivatives-of-phase-spectrum-for-speech-enhancement-2202.11918</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-continuity-learning-derivatives-of-phase-spectrum-for-speech-enhancement-2202.11918"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-continuity-learning-derivatives-of-phase-spectrum-for-speech-enhancement-2202.11918"/></url>
<url><loc>https://scifaro.com/en/abs/flat-latent-manifolds-for-human-machine-co-creation-of-music-2202.12243</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flat-latent-manifolds-for-human-machine-co-creation-of-music-2202.12243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flat-latent-manifolds-for-human-machine-co-creation-of-music-2202.12243"/></url>
<url><loc>https://scifaro.com/en/abs/a-perceptual-measure-for-evaluating-the-resynthesis-of-automatic-music-transcriptions-2202.12257</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-perceptual-measure-for-evaluating-the-resynthesis-of-automatic-music-transcriptions-2202.12257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-perceptual-measure-for-evaluating-the-resynthesis-of-automatic-music-transcriptions-2202.12257"/></url>
<url><loc>https://scifaro.com/en/abs/ask2mask-guided-data-selection-for-masked-speech-modeling-2202.12719</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ask2mask-guided-data-selection-for-masked-speech-modeling-2202.12719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ask2mask-guided-data-selection-for-masked-speech-modeling-2202.12719"/></url>
<url><loc>https://scifaro.com/en/abs/language-independent-speaker-anonymization-approach-using-self-supervised-pre-trained-models-2202.13097</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-independent-speaker-anonymization-approach-using-self-supervised-pre-trained-models-2202.13097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-independent-speaker-anonymization-approach-using-self-supervised-pre-trained-models-2202.13097"/></url>
<url><loc>https://scifaro.com/en/abs/an-acoustic-signal-cavitation-detection-framework-based-on-xgboost-with-adaptive-selection-feature-engineering-2202.13226</loc><lastmod>2022-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-acoustic-signal-cavitation-detection-framework-based-on-xgboost-with-adaptive-selection-feature-engineering-2202.13226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-acoustic-signal-cavitation-detection-framework-based-on-xgboost-with-adaptive-selection-feature-engineering-2202.13226"/></url>
<url><loc>https://scifaro.com/en/abs/regional-local-adversarially-learned-one-class-classifier-anomalous-sound-detection-in-global-long-term-space-2202.13245</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regional-local-adversarially-learned-one-class-classifier-anomalous-sound-detection-in-global-long-term-space-2202.13245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regional-local-adversarially-learned-one-class-classifier-anomalous-sound-detection-in-global-long-term-space-2202.13245"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-linear-dynamical-system-for-representing-notes-from-recorded-audio-2202.13255</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-linear-dynamical-system-for-representing-notes-from-recorded-audio-2202.13255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-linear-dynamical-system-for-representing-notes-from-recorded-audio-2202.13255"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-identification-2202.13865</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-identification-2202.13865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-identification-2202.13865"/></url>
<url><loc>https://scifaro.com/en/abs/extended-graph-temporal-classification-for-multi-speaker-end-to-end-asr-2203.00232</loc><lastmod>2022-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extended-graph-temporal-classification-for-multi-speaker-end-to-end-asr-2203.00232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extended-graph-temporal-classification-for-multi-speaker-end-to-end-asr-2203.00232"/></url>
<url><loc>https://scifaro.com/en/abs/dmf-net-a-decoupling-style-multi-band-fusion-model-for-full-band-speech-enhancement-2203.00472</loc><lastmod>2022-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dmf-net-a-decoupling-style-multi-band-fusion-model-for-full-band-speech-enhancement-2203.00472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dmf-net-a-decoupling-style-multi-band-fusion-model-for-full-band-speech-enhancement-2203.00472"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-several-parameterizations-for-speaker-recognition-2203.00513</loc><lastmod>2022-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-several-parameterizations-for-speaker-recognition-2203.00513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-several-parameterizations-for-speaker-recognition-2203.00513"/></url>
<url><loc>https://scifaro.com/en/abs/a-conformer-based-acoustic-model-for-robust-automatic-speech-recognition-2203.00725</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-conformer-based-acoustic-model-for-robust-automatic-speech-recognition-2203.00725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-conformer-based-acoustic-model-for-robust-automatic-speech-recognition-2203.00725"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaption-with-intuitive-prosodic-features-for-statistical-parametric-speech-synthesis-2203.00951</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaption-with-intuitive-prosodic-features-for-statistical-parametric-speech-synthesis-2203.00951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaption-with-intuitive-prosodic-features-for-statistical-parametric-speech-synthesis-2203.00951"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-scale-time-frequency-spectrogram-discriminator-for-gan-based-non-autoregressive-tts-2203.01080</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-scale-time-frequency-spectrogram-discriminator-for-gan-based-non-autoregressive-tts-2203.01080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-scale-time-frequency-spectrogram-discriminator-for-gan-based-non-autoregressive-tts-2203.01080"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-task-learning-for-cavitation-detection-and-cavitation-intensity-recognition-of-valve-acoustic-signals-2203.01118</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-task-learning-for-cavitation-detection-and-cavitation-intensity-recognition-of-valve-acoustic-signals-2203.01118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-task-learning-for-cavitation-detection-and-cavitation-intensity-recognition-of-valve-acoustic-signals-2203.01118"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-improvement-using-blind-inversion-of-distortions-2203.01164</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-improvement-using-blind-inversion-of-distortions-2203.01164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-improvement-using-blind-inversion-of-distortions-2203.01164"/></url>
<url><loc>https://scifaro.com/en/abs/audio-self-supervised-learning-a-survey-2203.01205</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-self-supervised-learning-a-survey-2203.01205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-self-supervised-learning-a-survey-2203.01205"/></url>
<url><loc>https://scifaro.com/en/abs/smtnet-hierarchical-cavitation-intensity-recognition-based-on-sub-main-transfer-network-2203.01429</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smtnet-hierarchical-cavitation-intensity-recognition-based-on-sub-main-transfer-network-2203.01429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smtnet-hierarchical-cavitation-intensity-recognition-based-on-sub-main-transfer-network-2203.01429"/></url>
<url><loc>https://scifaro.com/en/abs/generative-modeling-for-low-dimensional-speech-attributes-with-neural-spline-flows-2203.01786</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-modeling-for-low-dimensional-speech-attributes-with-neural-spline-flows-2203.01786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-modeling-for-low-dimensional-speech-attributes-with-neural-spline-flows-2203.01786"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-predictive-models-computation-in-adpcm-schemes-2203.02020</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-predictive-models-computation-in-adpcm-schemes-2203.02020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-predictive-models-computation-in-adpcm-schemes-2203.02020"/></url>
<url><loc>https://scifaro.com/en/abs/look-listen-multi-modal-correlation-learning-for-active-speaker-detection-and-speech-enhancement-2203.02216</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/look-listen-multi-modal-correlation-learning-for-active-speaker-detection-and-speech-enhancement-2203.02216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/look-listen-multi-modal-correlation-learning-for-active-speaker-detection-and-speech-enhancement-2203.02216"/></url>
<url><loc>https://scifaro.com/en/abs/istftnet-fast-and-lightweight-mel-spectrogram-vocoder-incorporating-inverse-short-time-fourier-transform-2203.02395</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/istftnet-fast-and-lightweight-mel-spectrogram-vocoder-incorporating-inverse-short-time-fourier-transform-2203.02395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/istftnet-fast-and-lightweight-mel-spectrogram-vocoder-incorporating-inverse-short-time-fourier-transform-2203.02395"/></url>
<url><loc>https://scifaro.com/en/abs/ontological-learning-from-weak-labels-2203.02483</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ontological-learning-from-weak-labels-2203.02483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ontological-learning-from-weak-labels-2203.02483"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-separation-based-on-joint-feature-representation-with-cross-modal-attention-2203.02655</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-separation-based-on-joint-feature-representation-with-cross-modal-attention-2203.02655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-separation-based-on-joint-feature-representation-with-cross-modal-attention-2203.02655"/></url>
<url><loc>https://scifaro.com/en/abs/neuraldps-neural-deterministic-plus-stochastic-model-with-multiband-excitation-for-noise-controllable-waveform-generation-2203.02678</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuraldps-neural-deterministic-plus-stochastic-model-with-multiband-excitation-for-noise-controllable-waveform-generation-2203.02678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuraldps-neural-deterministic-plus-stochastic-model-with-multiband-excitation-for-noise-controllable-waveform-generation-2203.02678"/></url>
<url><loc>https://scifaro.com/en/abs/single-microphone-speaker-extraction-using-unified-time-frequency-siamese-unet-2203.02941</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-microphone-speaker-extraction-using-unified-time-frequency-siamese-unet-2203.02941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-microphone-speaker-extraction-using-unified-time-frequency-siamese-unet-2203.02941"/></url>
<url><loc>https://scifaro.com/en/abs/c-p-map-a-novel-evaluation-toolkit-for-speaker-verification-2203.02942</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/c-p-map-a-novel-evaluation-toolkit-for-speaker-verification-2203.02942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/c-p-map-a-novel-evaluation-toolkit-for-speaker-verification-2203.02942"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-self-attention-voice-activity-detector-2203.02944</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-self-attention-voice-activity-detector-2203.02944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-self-attention-voice-activity-detector-2203.02944"/></url>
<url><loc>https://scifaro.com/en/abs/variational-auto-encoder-based-mandarin-speech-cloning-2203.02967</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-auto-encoder-based-mandarin-speech-cloning-2203.02967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-auto-encoder-based-mandarin-speech-cloning-2203.02967"/></url>
<url><loc>https://scifaro.com/en/abs/hear-holistic-evaluation-of-audio-representations-2203.03022</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hear-holistic-evaluation-of-audio-representations-2203.03022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hear-holistic-evaluation-of-audio-representations-2203.03022"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-by-means-of-a-combination-of-linear-and-nonlinear-predictive-models-2203.03190</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-by-means-of-a-combination-of-linear-and-nonlinear-predictive-models-2203.03190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-by-means-of-a-combination-of-linear-and-nonlinear-predictive-models-2203.03190"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-region-of-interest-roi-detection-for-speech-emotion-recognition-2203.03428</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-region-of-interest-roi-detection-for-speech-emotion-recognition-2203.03428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-region-of-interest-roi-detection-for-speech-emotion-recognition-2203.03428"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-ai-synthesized-hindi-speech-2203.03706</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-ai-synthesized-hindi-speech-2203.03706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-ai-synthesized-hindi-speech-2203.03706"/></url>
<url><loc>https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-incorporating-the-characteristics-of-speech-2203.03812</loc><lastmod>2022-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-incorporating-the-characteristics-of-speech-2203.03812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-incorporating-the-characteristics-of-speech-2203.03812"/></url>
<url><loc>https://scifaro.com/en/abs/digital-speech-algorithms-for-speaker-de-identification-2203.03932</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-speech-algorithms-for-speaker-de-identification-2203.03932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-speech-algorithms-for-speaker-de-identification-2203.03932"/></url>
<url><loc>https://scifaro.com/en/abs/vovit-low-latency-graph-based-audio-visual-voice-separation-transformer-2203.04099</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vovit-low-latency-graph-based-audio-visual-voice-separation-transformer-2203.04099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vovit-low-latency-graph-based-audio-visual-voice-separation-transformer-2203.04099"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-experiments-under-gender-de-identification-2203.04638</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-experiments-under-gender-de-identification-2203.04638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-experiments-under-gender-de-identification-2203.04638"/></url>
<url><loc>https://scifaro.com/en/abs/robust-federated-learning-against-adversarial-attacks-for-speech-emotion-recognition-2203.04696</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-federated-learning-against-adversarial-attacks-for-speech-emotion-recognition-2203.04696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-federated-learning-against-adversarial-attacks-for-speech-emotion-recognition-2203.04696"/></url>
<url><loc>https://scifaro.com/en/abs/an-environmental-feature-representation-in-i-vector-space-for-room-verification-and-metadata-estimation-2203.04880</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-environmental-feature-representation-in-i-vector-space-for-room-verification-and-metadata-estimation-2203.04880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-environmental-feature-representation-in-i-vector-space-for-room-verification-and-metadata-estimation-2203.04880"/></url>
<url><loc>https://scifaro.com/en/abs/eaceleb-an-east-asian-language-speaking-celebrity-dataset-for-speaker-recognition-2203.05333</loc><lastmod>2022-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eaceleb-an-east-asian-language-speaking-celebrity-dataset-for-speaker-recognition-2203.05333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eaceleb-an-east-asian-language-speaking-celebrity-dataset-for-speaker-recognition-2203.05333"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-free-attentive-scoring-for-speaker-verification-2203.05642</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-free-attentive-scoring-for-speaker-verification-2203.05642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-free-attentive-scoring-for-speaker-verification-2203.05642"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-transferability-of-speech-separation-by-meta-learning-2203.05882</loc><lastmod>2022-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-transferability-of-speech-separation-by-meta-learning-2203.05882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-transferability-of-speech-separation-by-meta-learning-2203.05882"/></url>
<url><loc>https://scifaro.com/en/abs/deep-convolutional-neural-network-for-roadway-incident-surveillance-using-audio-data-2203.06059</loc><lastmod>2022-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-for-roadway-incident-surveillance-using-audio-data-2203.06059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-convolutional-neural-network-for-roadway-incident-surveillance-using-audio-data-2203.06059"/></url>
<url><loc>https://scifaro.com/en/abs/climate-change-computer-audition-a-call-to-action-and-overview-on-audio-intelligence-to-help-save-the-planet-2203.06064</loc><lastmod>2022-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/climate-change-computer-audition-a-call-to-action-and-overview-on-audio-intelligence-to-help-save-the-planet-2203.06064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/climate-change-computer-audition-a-call-to-action-and-overview-on-audio-intelligence-to-help-save-the-planet-2203.06064"/></url>
<url><loc>https://scifaro.com/en/abs/infrastructure-free-deep-learned-urban-noise-monitoring-at-sim-100mw-2203.06220</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infrastructure-free-deep-learned-urban-noise-monitoring-at-sim-100mw-2203.06220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infrastructure-free-deep-learned-urban-noise-monitoring-at-sim-100mw-2203.06220"/></url>
<url><loc>https://scifaro.com/en/abs/sa-sasv-an-end-to-end-spoof-aggregated-spoofing-aware-speaker-verification-system-2203.06517</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-sasv-an-end-to-end-spoof-aggregated-spoofing-aware-speaker-verification-system-2203.06517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-sasv-an-end-to-end-spoof-aggregated-spoofing-aware-speaker-verification-system-2203.06517"/></url>
<url><loc>https://scifaro.com/en/abs/bi-sampling-approach-to-classify-music-mood-leveraging-raga-rasa-association-in-indian-classical-music-2203.06583</loc><lastmod>2022-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bi-sampling-approach-to-classify-music-mood-leveraging-raga-rasa-association-in-indian-classical-music-2203.06583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bi-sampling-approach-to-classify-music-mood-leveraging-raga-rasa-association-in-indian-classical-music-2203.06583"/></url>
<url><loc>https://scifaro.com/en/abs/cmkd-cnn-transformer-based-cross-model-knowledge-distillation-for-audio-classification-2203.06760</loc><lastmod>2022-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cmkd-cnn-transformer-based-cross-model-knowledge-distillation-for-audio-classification-2203.06760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cmkd-cnn-transformer-based-cross-model-knowledge-distillation-for-audio-classification-2203.06760"/></url>
<url><loc>https://scifaro.com/en/abs/mdnet-learning-monaural-speech-enhancement-from-deep-prior-gradient-2203.07179</loc><lastmod>2022-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdnet-learning-monaural-speech-enhancement-from-deep-prior-gradient-2203.07179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdnet-learning-monaural-speech-enhancement-from-deep-prior-gradient-2203.07179"/></url>
<url><loc>https://scifaro.com/en/abs/taylorbeamformer-learning-all-neural-beamformer-for-multi-channel-speech-enhancement-from-taylor-s-approximation-theory-2203.07195</loc><lastmod>2022-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taylorbeamformer-learning-all-neural-beamformer-for-multi-channel-speech-enhancement-from-taylor-s-approximation-theory-2203.07195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taylorbeamformer-learning-all-neural-beamformer-for-multi-channel-speech-enhancement-from-taylor-s-approximation-theory-2203.07195"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-unimodal-self-supervised-learning-for-multimodal-audio-visual-speech-recognition-2203.07996</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-unimodal-self-supervised-learning-for-multimodal-audio-visual-speech-recognition-2203.07996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-unimodal-self-supervised-learning-for-multimodal-audio-visual-speech-recognition-2203.07996"/></url>
<url><loc>https://scifaro.com/en/abs/can-a-neural-network-hear-the-shape-of-a-drum-2203.08073</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-a-neural-network-hear-the-shape-of-a-drum-2203.08073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-a-neural-network-hear-the-shape-of-a-drum-2203.08073"/></url>
<url><loc>https://scifaro.com/en/abs/instance-level-loss-based-multiple-instance-learning-framework-for-acoustic-scene-classification-2203.08439</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instance-level-loss-based-multiple-instance-learning-framework-for-acoustic-scene-classification-2203.08439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instance-level-loss-based-multiple-instance-learning-framework-for-acoustic-scene-classification-2203.08439"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-representations-with-mlps-2203.08490</loc><lastmod>2022-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-representations-with-mlps-2203.08490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-representations-with-mlps-2203.08490"/></url>
<url><loc>https://scifaro.com/en/abs/tms-a-temporal-multi-scale-backbone-design-for-speaker-embedding-2203.09098</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tms-a-temporal-multi-scale-backbone-design-for-speaker-embedding-2203.09098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tms-a-temporal-multi-scale-backbone-design-for-speaker-embedding-2203.09098"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-with-positive-negative-frame-mask-for-music-representation-2203.09129</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-with-positive-negative-frame-mask-for-music-representation-2203.09129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-with-positive-negative-frame-mask-for-music-representation-2203.09129"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-of-speech-intelligibility-with-dnn-based-performance-measures-2203.09148</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-of-speech-intelligibility-with-dnn-based-performance-measures-2203.09148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-of-speech-intelligibility-with-dnn-based-performance-measures-2203.09148"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-using-residual-signal-of-linear-and-nonlinear-prediction-models-2203.09231</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-using-residual-signal-of-linear-and-nonlinear-prediction-models-2203.09231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-using-residual-signal-of-linear-and-nonlinear-prediction-models-2203.09231"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-progress-of-parkinson-s-disease-using-acoustic-analysis-of-phonation-2203.09295</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-progress-of-parkinson-s-disease-using-acoustic-analysis-of-phonation-2203.09295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-progress-of-parkinson-s-disease-using-acoustic-analysis-of-phonation-2203.09295"/></url>
<url><loc>https://scifaro.com/en/abs/robust-and-complex-approach-of-pathological-speech-signal-analysis-2203.09402</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-and-complex-approach-of-pathological-speech-signal-analysis-2203.09402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-and-complex-approach-of-pathological-speech-signal-analysis-2203.09402"/></url>
<url><loc>https://scifaro.com/en/abs/improve-few-shot-voice-cloning-using-multi-modal-learning-2203.09708</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improve-few-shot-voice-cloning-using-multi-modal-learning-2203.09708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improve-few-shot-voice-cloning-using-multi-modal-learning-2203.09708"/></url>
<url><loc>https://scifaro.com/en/abs/dgc-vector-a-new-speaker-embedding-for-zero-shot-voice-conversion-2203.09722</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dgc-vector-a-new-speaker-embedding-for-zero-shot-voice-conversion-2203.09722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dgc-vector-a-new-speaker-embedding-for-zero-shot-voice-conversion-2203.09722"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-an-efficient-framework-for-overlapping-speech-diarization-in-meeting-scenarios-2203.09767</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-an-efficient-framework-for-overlapping-speech-diarization-in-meeting-scenarios-2203.09767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embedding-aware-neural-diarization-an-efficient-framework-for-overlapping-speech-diarization-in-meeting-scenarios-2203.09767"/></url>
<url><loc>https://scifaro.com/en/abs/adavocoder-adaptive-vocoder-for-custom-voice-2203.09825</loc><lastmod>2023-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adavocoder-adaptive-vocoder-for-custom-voice-2203.09825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adavocoder-adaptive-vocoder-for-custom-voice-2203.09825"/></url>
<url><loc>https://scifaro.com/en/abs/neural-predictor-for-black-box-adversarial-attacks-on-speech-recognition-2203.09849</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-predictor-for-black-box-adversarial-attacks-on-speech-recognition-2203.09849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-predictor-for-black-box-adversarial-attacks-on-speech-recognition-2203.09849"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-analysis-of-categorical-verbal-fluency-for-mild-cognitive-impartment-detection-a-non-linear-language-independent-approach-2203.09878</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-analysis-of-categorical-verbal-fluency-for-mild-cognitive-impartment-detection-a-non-linear-language-independent-approach-2203.09878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-analysis-of-categorical-verbal-fluency-for-mild-cognitive-impartment-detection-a-non-linear-language-independent-approach-2203.09878"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-hypokinetic-dysarthria-using-acoustic-analysis-of-poem-recitation-2203.09880</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-hypokinetic-dysarthria-using-acoustic-analysis-of-poem-recitation-2203.09880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-hypokinetic-dysarthria-using-acoustic-analysis-of-poem-recitation-2203.09880"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-instrument-agnostic-model-for-polyphonic-note-transcription-and-multipitch-estimation-2203.09893</loc><lastmod>2022-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-instrument-agnostic-model-for-polyphonic-note-transcription-and-multipitch-estimation-2203.09893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-instrument-agnostic-model-for-polyphonic-note-transcription-and-multipitch-estimation-2203.09893"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-filled-pause-generation-with-group-wise-prediction-models-2203.09961</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-filled-pause-generation-with-group-wise-prediction-models-2203.09961"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-filled-pause-generation-with-group-wise-prediction-models-2203.09961"/></url>
<url><loc>https://scifaro.com/en/abs/ross-utilizing-robotic-rotation-for-audio-source-separation-2203.10072</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ross-utilizing-robotic-rotation-for-audio-source-separation-2203.10072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ross-utilizing-robotic-rotation-for-audio-source-separation-2203.10072"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-role-of-lip-articulation-in-visual-speech-perception-2203.10117</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-role-of-lip-articulation-in-visual-speech-perception-2203.10117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-role-of-lip-articulation-in-visual-speech-perception-2203.10117"/></url>
<url><loc>https://scifaro.com/en/abs/a-track-wise-ensemble-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2203.10228</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-track-wise-ensemble-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2203.10228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-track-wise-ensemble-event-independent-network-for-polyphonic-sound-event-localization-and-detection-2203.10228"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-robustness-to-perturbations-for-representations-of-environmental-sound-2203.10425</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-robustness-to-perturbations-for-representations-of-environmental-sound-2203.10425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-robustness-to-perturbations-for-representations-of-environmental-sound-2203.10425"/></url>
<url><loc>https://scifaro.com/en/abs/ecapa-tdnn-for-multi-speaker-text-to-speech-synthesis-2203.10473</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecapa-tdnn-for-multi-speaker-text-to-speech-synthesis-2203.10473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecapa-tdnn-for-multi-speaker-text-to-speech-synthesis-2203.10473"/></url>
<url><loc>https://scifaro.com/en/abs/wesinger-data-augmented-singing-voice-synthesis-with-auxiliary-losses-2203.10750</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wesinger-data-augmented-singing-voice-synthesis-with-auxiliary-losses-2203.10750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wesinger-data-augmented-singing-voice-synthesis-with-auxiliary-losses-2203.10750"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-spoof-speech-detection-based-on-res2net-with-phase-network-2203.10793</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-spoof-speech-detection-based-on-res2net-with-phase-network-2203.10793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-spoof-speech-detection-based-on-res2net-with-phase-network-2203.10793"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-features-as-markers-of-parkinson-s-disease-the-issue-of-clinical-interpretability-2203.10830</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-features-as-markers-of-parkinson-s-disease-the-issue-of-clinical-interpretability-2203.10830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-features-as-markers-of-parkinson-s-disease-the-issue-of-clinical-interpretability-2203.10830"/></url>
<url><loc>https://scifaro.com/en/abs/multi-class-versus-one-class-classifier-in-spontaneous-speech-analysis-oriented-to-alzheimer-disease-diagnosis-2203.10837</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-class-versus-one-class-classifier-in-spontaneous-speech-analysis-oriented-to-alzheimer-disease-diagnosis-2203.10837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-class-versus-one-class-classifier-in-spontaneous-speech-analysis-oriented-to-alzheimer-disease-diagnosis-2203.10837"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-aware-speaker-verification-with-unsupervised-domain-adaptation-2203.10992</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-with-unsupervised-domain-adaptation-2203.10992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-with-unsupervised-domain-adaptation-2203.10992"/></url>
<url><loc>https://scifaro.com/en/abs/autotts-end-to-end-text-to-speech-synthesis-through-differentiable-duration-modeling-2203.11049</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autotts-end-to-end-text-to-speech-synthesis-through-differentiable-duration-modeling-2203.11049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autotts-end-to-end-text-to-speech-synthesis-through-differentiable-duration-modeling-2203.11049"/></url>
<url><loc>https://scifaro.com/en/abs/individualizing-head-related-transfer-functions-for-binaural-acoustic-applications-2203.11138</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/individualizing-head-related-transfer-functions-for-binaural-acoustic-applications-2203.11138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/individualizing-head-related-transfer-functions-for-binaural-acoustic-applications-2203.11138"/></url>
<url><loc>https://scifaro.com/en/abs/automated-detection-of-foreground-speech-with-wearable-sensing-in-everyday-home-environments-a-transfer-learning-approach-2203.11294</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-detection-of-foreground-speech-with-wearable-sensing-in-everyday-home-environments-a-transfer-learning-approach-2203.11294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-detection-of-foreground-speech-with-wearable-sensing-in-everyday-home-environments-a-transfer-learning-approach-2203.11294"/></url>
<url><loc>https://scifaro.com/en/abs/the-voicemos-challenge-2022-2203.11389</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voicemos-challenge-2022-2203.11389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voicemos-challenge-2022-2203.11389"/></url>
<url><loc>https://scifaro.com/en/abs/residual-guided-non-intrusive-speech-quality-assessment-2203.11499</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-guided-non-intrusive-speech-quality-assessment-2203.11499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-guided-non-intrusive-speech-quality-assessment-2203.11499"/></url>
<url><loc>https://scifaro.com/en/abs/a-text-to-speech-pipeline-evaluation-methodology-and-initial-fine-tuning-results-for-child-speech-synthesis-2203.11562</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-text-to-speech-pipeline-evaluation-methodology-and-initial-fine-tuning-results-for-child-speech-synthesis-2203.11562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-text-to-speech-pipeline-evaluation-methodology-and-initial-fine-tuning-results-for-child-speech-synthesis-2203.11562"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-generative-data-augmentation-for-clinical-audio-datasets-2203.11570</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-generative-data-augmentation-for-clinical-audio-datasets-2203.11570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-generative-data-augmentation-for-clinical-audio-datasets-2203.11570"/></url>
<url><loc>https://scifaro.com/en/abs/ct-sat-contextual-transformer-for-sequential-audio-tagging-2203.11573</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ct-sat-contextual-transformer-for-sequential-audio-tagging-2203.11573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ct-sat-contextual-transformer-for-sequential-audio-tagging-2203.11573"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-disfluencies-for-automatic-detection-of-mild-cognitive-impartment-a-deep-learning-approach-2203.11606</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-disfluencies-for-automatic-detection-of-mild-cognitive-impartment-a-deep-learning-approach-2203.11606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-disfluencies-for-automatic-detection-of-mild-cognitive-impartment-a-deep-learning-approach-2203.11606"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-prediction-with-neural-nets-in-adpcm-2203.11612</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-prediction-with-neural-nets-in-adpcm-2203.11612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-prediction-with-neural-nets-in-adpcm-2203.11612"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-with-a-mlp-classifier-and-lpcc-codebook-2203.11614</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-with-a-mlp-classifier-and-lpcc-codebook-2203.11614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-with-a-mlp-classifier-and-lpcc-codebook-2203.11614"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-speaker-age-and-height-from-speech-signal-using-bi-encoder-transformer-mixture-model-2203.11774</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-speaker-age-and-height-from-speech-signal-using-bi-encoder-transformer-mixture-model-2203.11774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-speaker-age-and-height-from-speech-signal-using-bi-encoder-transformer-mixture-model-2203.11774"/></url>
<url><loc>https://scifaro.com/en/abs/federated-self-supervised-learning-for-acoustic-event-classification-2203.11997</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-self-supervised-learning-for-acoustic-event-classification-2203.11997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-self-supervised-learning-for-acoustic-event-classification-2203.11997"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-using-an-lstm-2203.12105</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-using-an-lstm-2203.12105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-using-an-lstm-2203.12105"/></url>
<url><loc>https://scifaro.com/en/abs/on-adversarial-robustness-of-large-scale-audio-visual-learning-2203.12122</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-adversarial-robustness-of-large-scale-audio-visual-learning-2203.12122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-adversarial-robustness-of-large-scale-audio-visual-learning-2203.12122"/></url>
<url><loc>https://scifaro.com/en/abs/fullsubnet-channel-attention-fullsubnet-with-complex-spectrograms-for-speech-enhancement-2203.12188</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fullsubnet-channel-attention-fullsubnet-with-complex-spectrograms-for-speech-enhancement-2203.12188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fullsubnet-channel-attention-fullsubnet-with-complex-spectrograms-for-speech-enhancement-2203.12188"/></url>
<url><loc>https://scifaro.com/en/abs/towards-expressive-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2203.12201</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-expressive-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2203.12201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-expressive-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2203.12201"/></url>
<url><loc>https://scifaro.com/en/abs/quantitative-evaluation-approach-for-translation-of-perceptual-soundscape-attributes-initial-application-to-the-thai-language-2203.12245</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantitative-evaluation-approach-for-translation-of-perceptual-soundscape-attributes-initial-application-to-the-thai-language-2203.12245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantitative-evaluation-approach-for-translation-of-perceptual-soundscape-attributes-initial-application-to-the-thai-language-2203.12245"/></url>
<url><loc>https://scifaro.com/en/abs/a-combination-between-vq-and-covariance-matrices-for-speaker-recognition-2203.12306</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-combination-between-vq-and-covariance-matrices-for-speaker-recognition-2203.12306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-combination-between-vq-and-covariance-matrices-for-speaker-recognition-2203.12306"/></url>
<url><loc>https://scifaro.com/en/abs/wider-or-deeper-neural-network-architecture-for-acoustic-scene-classification-with-mismatched-recording-devices-2203.12314</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wider-or-deeper-neural-network-architecture-for-acoustic-scene-classification-with-mismatched-recording-devices-2203.12314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wider-or-deeper-neural-network-architecture-for-acoustic-scene-classification-with-mismatched-recording-devices-2203.12314"/></url>
<url><loc>https://scifaro.com/en/abs/metricgan-increasing-robustness-of-noise-reduction-on-unseen-data-2203.12369</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metricgan-increasing-robustness-of-noise-reduction-on-unseen-data-2203.12369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metricgan-increasing-robustness-of-noise-reduction-on-unseen-data-2203.12369"/></url>
<url><loc>https://scifaro.com/en/abs/an-interactive-music-infilling-interface-for-pop-music-composition-2203.12736</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-interactive-music-infilling-interface-for-pop-music-composition-2203.12736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-interactive-music-infilling-interface-for-pop-music-composition-2203.12736"/></url>
<url><loc>https://scifaro.com/en/abs/disentangleing-content-and-fine-grained-prosody-information-via-hybrid-asr-bottleneck-features-for-voice-conversion-2203.12813</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangleing-content-and-fine-grained-prosody-information-via-hybrid-asr-bottleneck-features-for-voice-conversion-2203.12813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangleing-content-and-fine-grained-prosody-information-via-hybrid-asr-bottleneck-features-for-voice-conversion-2203.12813"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-subband-non-linear-prediction-coding-algorithm-for-narrowband-speech-signal-the-nadpcmb-mlt-coding-scheme-2203.12894</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-subband-non-linear-prediction-coding-algorithm-for-narrowband-speech-signal-the-nadpcmb-mlt-coding-scheme-2203.12894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-subband-non-linear-prediction-coding-algorithm-for-narrowband-speech-signal-the-nadpcmb-mlt-coding-scheme-2203.12894"/></url>
<url><loc>https://scifaro.com/en/abs/wide-band-sub-band-speech-coding-using-nonlinear-prediction-2203.12896</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wide-band-sub-band-speech-coding-using-nonlinear-prediction-2203.12896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wide-band-sub-band-speech-coding-using-nonlinear-prediction-2203.12896"/></url>
<url><loc>https://scifaro.com/en/abs/selfremaster-self-supervised-speech-restoration-with-analysis-by-synthesis-approach-using-channel-modeling-2203.12937</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selfremaster-self-supervised-speech-restoration-with-analysis-by-synthesis-approach-using-channel-modeling-2203.12937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selfremaster-self-supervised-speech-restoration-with-analysis-by-synthesis-approach-using-channel-modeling-2203.12937"/></url>
<url><loc>https://scifaro.com/en/abs/score-difficulty-analysis-for-piano-performance-education-based-on-fingering-2203.13010</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-difficulty-analysis-for-piano-performance-education-based-on-fingering-2203.13010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-difficulty-analysis-for-piano-performance-education-based-on-fingering-2203.13010"/></url>
<url><loc>https://scifaro.com/en/abs/bailando-3d-dance-generation-by-actor-critic-gpt-with-choreographic-memory-2203.13055</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bailando-3d-dance-generation-by-actor-critic-gpt-with-choreographic-memory-2203.13055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bailando-3d-dance-generation-by-actor-critic-gpt-with-choreographic-memory-2203.13055"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-a-unified-framework-for-bandwidth-extension-and-speech-enhancement-2203.13086</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-a-unified-framework-for-bandwidth-extension-and-speech-enhancement-2203.13086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-a-unified-framework-for-bandwidth-extension-and-speech-enhancement-2203.13086"/></url>
<url><loc>https://scifaro.com/en/abs/midiverto-a-web-application-to-visualize-tonality-in-real-time-2203.13158</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midiverto-a-web-application-to-visualize-tonality-in-real-time-2203.13158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midiverto-a-web-application-to-visualize-tonality-in-real-time-2203.13158"/></url>
<url><loc>https://scifaro.com/en/abs/complex-frequency-domain-linear-prediction-a-tool-to-compute-modulation-spectrum-of-speech-2203.13216</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-frequency-domain-linear-prediction-a-tool-to-compute-modulation-spectrum-of-speech-2203.13216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-frequency-domain-linear-prediction-a-tool-to-compute-modulation-spectrum-of-speech-2203.13216"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-time-audiovisual-fusion-with-recurrence-vs-attention-for-in-the-wild-affect-recognition-2203.13285</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-time-audiovisual-fusion-with-recurrence-vs-attention-for-in-the-wild-affect-recognition-2203.13285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-time-audiovisual-fusion-with-recurrence-vs-attention-for-in-the-wild-affect-recognition-2203.13285"/></url>
<url><loc>https://scifaro.com/en/abs/audiotagging-done-right-2nd-comparison-of-deep-learning-methods-for-environmental-sound-classification-2203.13448</loc><lastmod>2022-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiotagging-done-right-2nd-comparison-of-deep-learning-methods-for-environmental-sound-classification-2203.13448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiotagging-done-right-2nd-comparison-of-deep-learning-methods-for-environmental-sound-classification-2203.13448"/></url>
<url><loc>https://scifaro.com/en/abs/wavefuzz-a-clean-label-poisoning-attack-to-protect-your-voice-2203.13497</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavefuzz-a-clean-label-poisoning-attack-to-protect-your-voice-2203.13497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavefuzz-a-clean-label-poisoning-attack-to-protect-your-voice-2203.13497"/></url>
<url><loc>https://scifaro.com/en/abs/delores-decorrelating-latent-spaces-for-low-resource-audio-representation-learning-2203.13628</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delores-decorrelating-latent-spaces-for-low-resource-audio-representation-learning-2203.13628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delores-decorrelating-latent-spaces-for-low-resource-audio-representation-learning-2203.13628"/></url>
<url><loc>https://scifaro.com/en/abs/audio-text-retrieval-in-context-2203.13645</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-text-retrieval-in-context-2203.13645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-text-retrieval-in-context-2203.13645"/></url>
<url><loc>https://scifaro.com/en/abs/chain-based-discriminative-autoencoders-for-speech-recognition-2203.13687</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chain-based-discriminative-autoencoders-for-speech-recognition-2203.13687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chain-based-discriminative-autoencoders-for-speech-recognition-2203.13687"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhanced-and-noise-aware-networks-for-robust-speech-recognition-2203.13696</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhanced-and-noise-aware-networks-for-robust-speech-recognition-2203.13696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhanced-and-noise-aware-networks-for-robust-speech-recognition-2203.13696"/></url>
<url><loc>https://scifaro.com/en/abs/smp-phat-lightweight-doa-estimation-by-merging-microphone-pairs-2203.14409</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smp-phat-lightweight-doa-estimation-by-merging-microphone-pairs-2203.14409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smp-phat-lightweight-doa-estimation-by-merging-microphone-pairs-2203.14409"/></url>
<url><loc>https://scifaro.com/en/abs/subjective-evaluation-of-deep-learning-models-for-symbolic-music-composition-2203.14641</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subjective-evaluation-of-deep-learning-models-for-symbolic-music-composition-2203.14641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subjective-evaluation-of-deep-learning-models-for-symbolic-music-composition-2203.14641"/></url>
<url><loc>https://scifaro.com/en/abs/training-speaker-recognition-systems-with-limited-data-2203.14688</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-speaker-recognition-systems-with-limited-data-2203.14688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-speaker-recognition-systems-with-limited-data-2203.14688"/></url>
<url><loc>https://scifaro.com/en/abs/vtts-visual-text-to-speech-2203.14725</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vtts-visual-text-to-speech-2203.14725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vtts-visual-text-to-speech-2203.14725"/></url>
<url><loc>https://scifaro.com/en/abs/studies-corpus-of-japanese-empathetic-dialogue-speech-towards-friendly-voice-agent-2203.14757</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/studies-corpus-of-japanese-empathetic-dialogue-speech-towards-friendly-voice-agent-2203.14757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/studies-corpus-of-japanese-empathetic-dialogue-speech-towards-friendly-voice-agent-2203.14757"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-language-independent-speaker-anonymization-framework-under-unseen-conditions-2203.14834</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-language-independent-speaker-anonymization-framework-under-unseen-conditions-2203.14834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-language-independent-speaker-anonymization-framework-under-unseen-conditions-2203.14834"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speaker-recognition-with-transformers-using-wav2vec-2-0-2203.15095</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speaker-recognition-with-transformers-using-wav2vec-2-0-2203.15095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speaker-recognition-with-transformers-using-wav2vec-2-0-2203.15095"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-different-calibration-methods-for-deep-speaker-embedding-based-verification-systems-2203.15106</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-different-calibration-methods-for-deep-speaker-embedding-based-verification-systems-2203.15106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-different-calibration-methods-for-deep-speaker-embedding-based-verification-systems-2203.15106"/></url>
<url><loc>https://scifaro.com/en/abs/improving-source-separation-by-explicitly-modeling-dependencies-between-sources-2203.15140</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-source-separation-by-explicitly-modeling-dependencies-between-sources-2203.15140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-source-separation-by-explicitly-modeling-dependencies-between-sources-2203.15140"/></url>
<url><loc>https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-speech-enhancement-2203.15149</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-speech-enhancement-2203.15149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-speech-enhancement-2203.15149"/></url>
<url><loc>https://scifaro.com/en/abs/shifted-chunk-encoder-for-transformer-based-streaming-end-to-end-asr-2203.15206</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shifted-chunk-encoder-for-transformer-based-streaming-end-to-end-asr-2203.15206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shifted-chunk-encoder-for-transformer-based-streaming-end-to-end-asr-2203.15206"/></url>
<url><loc>https://scifaro.com/en/abs/mfa-conformer-multi-scale-feature-aggregation-conformer-for-automatic-speaker-verification-2203.15249</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfa-conformer-multi-scale-feature-aggregation-conformer-for-automatic-speaker-verification-2203.15249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfa-conformer-multi-scale-feature-aggregation-conformer-for-automatic-speaker-verification-2203.15249"/></url>
<url><loc>https://scifaro.com/en/abs/neuragen-a-low-resource-neural-network-based-approach-for-gender-classification-2203.15253</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuragen-a-low-resource-neural-network-based-approach-for-gender-classification-2203.15253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuragen-a-low-resource-neural-network-based-approach-for-gender-classification-2203.15253"/></url>
<url><loc>https://scifaro.com/en/abs/applying-syntax-unicode-x2013-prosody-mapping-hypothesis-and-prosodic-well-formedness-constraints-to-neural-sequence-to-sequence-speech-synthesis-2203.15276</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-syntax-unicode-x2013-prosody-mapping-hypothesis-and-prosodic-well-formedness-constraints-to-neural-sequence-to-sequence-speech-synthesis-2203.15276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-syntax-unicode-x2013-prosody-mapping-hypothesis-and-prosodic-well-formedness-constraints-to-neural-sequence-to-sequence-speech-synthesis-2203.15276"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-speech-recognition-with-10-minutes-unparalleled-in-domain-data-2203.15321</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-speech-recognition-with-10-minutes-unparalleled-in-domain-data-2203.15321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-speech-recognition-with-10-minutes-unparalleled-in-domain-data-2203.15321"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-with-co-attention-based-multi-level-acoustic-information-2203.15326</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-co-attention-based-multi-level-acoustic-information-2203.15326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-co-attention-based-multi-level-acoustic-information-2203.15326"/></url>
<url><loc>https://scifaro.com/en/abs/iranian-modal-music-dastgah-detection-using-deep-neural-networks-2203.15335</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iranian-modal-music-dastgah-detection-using-deep-neural-networks-2203.15335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iranian-modal-music-dastgah-detection-using-deep-neural-networks-2203.15335"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-aware-speaker-verification-by-multi-level-fusion-2203.15377</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-by-multi-level-fusion-2203.15377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-by-multi-level-fusion-2203.15377"/></url>
<url><loc>https://scifaro.com/en/abs/voiceme-personalized-voice-generation-in-tts-2203.15379</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceme-personalized-voice-generation-in-tts-2203.15379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceme-personalized-voice-generation-in-tts-2203.15379"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-self-supervised-pretraining-frameworks-for-pathological-speech-recognition-2203.15431</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-self-supervised-pretraining-frameworks-for-pathological-speech-recognition-2203.15431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-self-supervised-pretraining-frameworks-for-pathological-speech-recognition-2203.15431"/></url>
<url><loc>https://scifaro.com/en/abs/wenet-2-0-more-productive-end-to-end-speech-recognition-toolkit-2203.15455</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wenet-2-0-more-productive-end-to-end-speech-recognition-toolkit-2203.15455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wenet-2-0-more-productive-end-to-end-speech-recognition-toolkit-2203.15455"/></url>
<url><loc>https://scifaro.com/en/abs/machine-composition-of-korean-music-via-topological-data-analysis-and-artificial-neural-network-2203.15468</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-composition-of-korean-music-via-topological-data-analysis-and-artificial-neural-network-2203.15468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-composition-of-korean-music-via-topological-data-analysis-and-artificial-neural-network-2203.15468"/></url>
<url><loc>https://scifaro.com/en/abs/learning-neural-audio-features-without-supervision-2203.15519</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-neural-audio-features-without-supervision-2203.15519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-neural-audio-features-without-supervision-2203.15519"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-audio-text-representation-for-automated-audio-captioning-with-contrastive-learning-2203.15526</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-audio-text-representation-for-automated-audio-captioning-with-contrastive-learning-2203.15526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-audio-text-representation-for-automated-audio-captioning-with-contrastive-learning-2203.15526"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-for-speech-emotion-recognition-in-greek-theatrical-plays-2203.15568</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-for-speech-emotion-recognition-in-greek-theatrical-plays-2203.15568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-for-speech-emotion-recognition-in-greek-theatrical-plays-2203.15568"/></url>
<url><loc>https://scifaro.com/en/abs/subspace-based-representation-and-learning-for-phonotactic-spoken-language-recognition-2203.15576</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspace-based-representation-and-learning-for-phonotactic-spoken-language-recognition-2203.15576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspace-based-representation-and-learning-for-phonotactic-spoken-language-recognition-2203.15576"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-speech-from-surroundings-with-neural-embeddings-2203.15578</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-speech-from-surroundings-with-neural-embeddings-2203.15578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-speech-from-surroundings-with-neural-embeddings-2203.15578"/></url>
<url><loc>https://scifaro.com/en/abs/locality-matters-a-locality-biased-linear-attention-for-automatic-speech-recognition-2203.15609</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locality-matters-a-locality-biased-linear-attention-for-automatic-speech-recognition-2203.15609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locality-matters-a-locality-biased-linear-attention-for-automatic-speech-recognition-2203.15609"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-latency-for-ctc-based-streaming-automatic-speech-recognition-with-emformer-2203.15613</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-latency-for-ctc-based-streaming-automatic-speech-recognition-with-emformer-2203.15613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-latency-for-ctc-based-streaming-automatic-speech-recognition-with-emformer-2203.15613"/></url>
<url><loc>https://scifaro.com/en/abs/nix-tts-lightweight-and-end-to-end-text-to-speech-via-module-wise-distillation-2203.15643</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nix-tts-lightweight-and-end-to-end-text-to-speech-via-module-wise-distillation-2203.15643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nix-tts-lightweight-and-end-to-end-text-to-speech-via-module-wise-distillation-2203.15643"/></url>
<url><loc>https://scifaro.com/en/abs/drspeech-degradation-robust-text-to-speech-synthesis-with-frame-level-and-utterance-level-acoustic-representation-learning-2203.15683</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drspeech-degradation-robust-text-to-speech-synthesis-with-frame-level-and-utterance-level-acoustic-representation-learning-2203.15683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drspeech-degradation-robust-text-to-speech-synthesis-with-frame-level-and-utterance-level-acoustic-representation-learning-2203.15683"/></url>
<url><loc>https://scifaro.com/en/abs/target-geometry-estimation-using-deep-neural-networks-in-sonar-sensing-2203.15770</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-geometry-estimation-using-deep-neural-networks-in-sonar-sensing-2203.15770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-geometry-estimation-using-deep-neural-networks-in-sonar-sensing-2203.15770"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-analysis-of-sequence-to-sequence-emotional-voice-conversion-2203.15873</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-analysis-of-sequence-to-sequence-emotional-voice-conversion-2203.15873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-analysis-of-sequence-to-sequence-emotional-voice-conversion-2203.15873"/></url>
<url><loc>https://scifaro.com/en/abs/federated-domain-adaptation-for-asr-with-full-self-supervision-2203.15966</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-domain-adaptation-for-asr-with-full-self-supervision-2203.15966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-domain-adaptation-for-asr-with-full-self-supervision-2203.15966"/></url>
<url><loc>https://scifaro.com/en/abs/multi-target-extractor-and-detector-for-unknown-number-speaker-diarization-2203.16007</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-target-extractor-and-detector-for-unknown-number-speaker-diarization-2203.16007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-target-extractor-and-detector-for-unknown-number-speaker-diarization-2203.16007"/></url>
<url><loc>https://scifaro.com/en/abs/conferencingspeech-2022-challenge-non-intrusive-objective-speech-quality-assessment-nisqa-challenge-for-online-conferencing-applications-2203.16032</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conferencingspeech-2022-challenge-non-intrusive-objective-speech-quality-assessment-nisqa-challenge-for-online-conferencing-applications-2203.16032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conferencingspeech-2022-challenge-non-intrusive-objective-speech-quality-assessment-nisqa-challenge-for-online-conferencing-applications-2203.16032"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-shoulder-to-shoulder-a-coordinated-sub-band-fusion-model-for-real-time-full-band-speech-enhancement-2203.16033</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-shoulder-to-shoulder-a-coordinated-sub-band-fusion-model-for-real-time-full-band-speech-enhancement-2203.16033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-shoulder-to-shoulder-a-coordinated-sub-band-fusion-model-for-real-time-full-band-speech-enhancement-2203.16033"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-zero-shot-many-to-many-voice-conversion-with-self-attention-vae-2203.16037</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-zero-shot-many-to-many-voice-conversion-with-self-attention-vae-2203.16037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-zero-shot-many-to-many-voice-conversion-with-self-attention-vae-2203.16037"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-the-impacts-of-language-and-channel-variability-on-speech-separation-networks-2203.16040</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-the-impacts-of-language-and-channel-variability-on-speech-separation-networks-2203.16040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-the-impacts-of-language-and-channel-variability-on-speech-separation-networks-2203.16040"/></url>
<url><loc>https://scifaro.com/en/abs/coarse-to-fine-recursive-speech-separation-for-unknown-number-of-speakers-2203.16054</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coarse-to-fine-recursive-speech-separation-for-unknown-number-of-speakers-2203.16054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coarse-to-fine-recursive-speech-separation-for-unknown-number-of-speakers-2203.16054"/></url>
<url><loc>https://scifaro.com/en/abs/combination-of-time-domain-frequency-domain-and-cepstral-domain-acoustic-features-for-speech-commands-classification-2203.16085</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combination-of-time-domain-frequency-domain-and-cepstral-domain-acoustic-features-for-speech-commands-classification-2203.16085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combination-of-time-domain-frequency-domain-and-cepstral-domain-acoustic-features-for-speech-commands-classification-2203.16085"/></url>
<url><loc>https://scifaro.com/en/abs/improving-distortion-robustness-of-self-supervised-speech-processing-tasks-with-domain-adaptation-2203.16104</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-distortion-robustness-of-self-supervised-speech-processing-tasks-with-domain-adaptation-2203.16104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-distortion-robustness-of-self-supervised-speech-processing-tasks-with-domain-adaptation-2203.16104"/></url>
<url><loc>https://scifaro.com/en/abs/example-based-explanations-with-adversarial-attacks-for-respiratory-sound-analysis-2203.16141</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/example-based-explanations-with-adversarial-attacks-for-respiratory-sound-analysis-2203.16141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/example-based-explanations-with-adversarial-attacks-for-respiratory-sound-analysis-2203.16141"/></url>
<url><loc>https://scifaro.com/en/abs/does-audio-deepfake-detection-generalize-2203.16263</loc><lastmod>2026-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-audio-deepfake-detection-generalize-2203.16263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-audio-deepfake-detection-generalize-2203.16263"/></url>
<url><loc>https://scifaro.com/en/abs/acoustics-specific-piano-velocity-estimation-2203.16294</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustics-specific-piano-velocity-estimation-2203.16294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustics-specific-piano-velocity-estimation-2203.16294"/></url>
<url><loc>https://scifaro.com/en/abs/rainbow-keywords-efficient-incremental-learning-for-online-spoken-keyword-spotting-2203.16361</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rainbow-keywords-efficient-incremental-learning-for-online-spoken-keyword-spotting-2203.16361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rainbow-keywords-efficient-incremental-learning-for-online-spoken-keyword-spotting-2203.16361"/></url>
<url><loc>https://scifaro.com/en/abs/learn2sing-2-0-diffusion-and-mutual-information-based-target-speaker-svs-by-learning-from-singing-teacher-2203.16408</loc><lastmod>2022-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn2sing-2-0-diffusion-and-mutual-information-based-target-speaker-svs-by-learning-from-singing-teacher-2203.16408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn2sing-2-0-diffusion-and-mutual-information-based-target-speaker-svs-by-learning-from-singing-teacher-2203.16408"/></url>
<url><loc>https://scifaro.com/en/abs/forensic-analysis-and-localization-of-multiply-compressed-mp3-audio-using-transformers-2203.16499</loc><lastmod>2022-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/forensic-analysis-and-localization-of-multiply-compressed-mp3-audio-using-transformers-2203.16499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/forensic-analysis-and-localization-of-multiply-compressed-mp3-audio-using-transformers-2203.16499"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-handcrafted-and-learnable-audio-representation-for-analysis-of-speech-under-cognitive-and-physical-load-2203.16637</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-handcrafted-and-learnable-audio-representation-for-analysis-of-speech-under-cognitive-and-physical-load-2203.16637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-handcrafted-and-learnable-audio-representation-for-analysis-of-speech-under-cognitive-and-physical-load-2203.16637"/></url>
<url><loc>https://scifaro.com/en/abs/generation-of-speaker-representations-using-heterogeneous-training-batch-assembly-2203.16646</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generation-of-speaker-representations-using-heterogeneous-training-batch-assembly-2203.16646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generation-of-speaker-representations-using-heterogeneous-training-batch-assembly-2203.16646"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-de-identification-with-functional-data-analysis-of-f0-trajectories-2203.16738</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-de-identification-with-functional-data-analysis-of-f0-trajectories-2203.16738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-de-identification-with-functional-data-analysis-of-f0-trajectories-2203.16738"/></url>
<url><loc>https://scifaro.com/en/abs/effective-data-screening-technique-for-crowdsourced-speech-intelligibility-experiments-evaluation-with-irm-based-speech-enhancement-2203.16760</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-data-screening-technique-for-crowdsourced-speech-intelligibility-experiments-evaluation-with-irm-based-speech-enhancement-2203.16760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-data-screening-technique-for-crowdsourced-speech-intelligibility-experiments-evaluation-with-irm-based-speech-enhancement-2203.16760"/></url>
<url><loc>https://scifaro.com/en/abs/learning-decoupling-features-through-orthogonality-regularization-2203.16772</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-decoupling-features-through-orthogonality-regularization-2203.16772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-decoupling-features-through-orthogonality-regularization-2203.16772"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2203.16834</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2203.16834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2203.16834"/></url>
<url><loc>https://scifaro.com/en/abs/neufa-neural-network-based-end-to-end-forced-alignment-with-bidirectional-attention-mechanism-2203.16838</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neufa-neural-network-based-end-to-end-forced-alignment-with-bidirectional-attention-mechanism-2203.16838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neufa-neural-network-based-end-to-end-forced-alignment-with-bidirectional-attention-mechanism-2203.16838"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-for-speech-emotion-recognition-2203.16928</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-for-speech-emotion-recognition-2203.16928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-for-speech-emotion-recognition-2203.16928"/></url>
<url><loc>https://scifaro.com/en/abs/wavthruvec-latent-speech-representation-as-intermediate-features-for-neural-speech-synthesis-2203.16930</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavthruvec-latent-speech-representation-as-intermediate-features-for-neural-speech-synthesis-2203.16930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavthruvec-latent-speech-representation-as-intermediate-features-for-neural-speech-synthesis-2203.16930"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-vc-high-quality-asr-based-voice-conversion-2203.16937</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-vc-high-quality-asr-based-voice-conversion-2203.16937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-vc-high-quality-asr-based-voice-conversion-2203.16937"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-between-linear-and-nonlinear-speech-prediction-2203.16962</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-between-linear-and-nonlinear-speech-prediction-2203.16962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-between-linear-and-nonlinear-speech-prediction-2203.16962"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-fusion-methods-for-sasv-challenge-2022-2203.16970</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-fusion-methods-for-sasv-challenge-2022-2203.16970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-fusion-methods-for-sasv-challenge-2022-2203.16970"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-net-a-novel-neural-network-for-sound-localization-and-quantification-2203.16988</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-net-a-novel-neural-network-for-sound-localization-and-quantification-2203.16988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-net-a-novel-neural-network-for-sound-localization-and-quantification-2203.16988"/></url>
<url><loc>https://scifaro.com/en/abs/a-temporal-oriented-broadcast-resnet-for-covid-19-detection-2203.17012</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-temporal-oriented-broadcast-resnet-for-covid-19-detection-2203.17012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-temporal-oriented-broadcast-resnet-for-covid-19-detection-2203.17012"/></url>
<url><loc>https://scifaro.com/en/abs/cta-rnn-channel-and-temporal-wise-attention-rnn-leveraging-pre-trained-asr-embeddings-for-speech-emotion-recognition-2203.17023</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cta-rnn-channel-and-temporal-wise-attention-rnn-leveraging-pre-trained-asr-embeddings-for-speech-emotion-recognition-2203.17023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cta-rnn-channel-and-temporal-wise-attention-rnn-leveraging-pre-trained-asr-embeddings-for-speech-emotion-recognition-2203.17023"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-speaker-distillation-for-countermeasure-model-on-automatic-speaker-verification-2203.17031</loc><lastmod>2025-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-speaker-distillation-for-countermeasure-model-on-automatic-speaker-verification-2203.17031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-speaker-distillation-for-countermeasure-model-on-automatic-speaker-verification-2203.17031"/></url>
<url><loc>https://scifaro.com/en/abs/manipulation-of-oral-cancer-speech-using-neural-articulatory-synthesis-2203.17072</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manipulation-of-oral-cancer-speech-using-neural-articulatory-synthesis-2203.17072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manipulation-of-oral-cancer-speech-using-neural-articulatory-synthesis-2203.17072"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-environmental-noise-on-alzheimer-s-disease-detection-from-speech-should-you-let-a-baby-cry-2203.17110</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-environmental-noise-on-alzheimer-s-disease-detection-from-speech-should-you-let-a-baby-cry-2203.17110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-environmental-noise-on-alzheimer-s-disease-detection-from-speech-should-you-let-a-baby-cry-2203.17110"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-transformer-decoder-for-end-to-end-asr-model-with-unpaired-speech-data-2203.17113</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-transformer-decoder-for-end-to-end-asr-model-with-unpaired-speech-data-2203.17113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-transformer-decoder-for-end-to-end-asr-model-with-unpaired-speech-data-2203.17113"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-contrast-stretching-on-target-feature-for-speech-enhancement-2203.17152</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-contrast-stretching-on-target-feature-for-speech-enhancement-2203.17152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-contrast-stretching-on-target-feature-for-speech-enhancement-2203.17152"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-expressed-emotion-from-five-minute-speech-samples-challenges-and-opportunities-2203.17242</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-expressed-emotion-from-five-minute-speech-samples-challenges-and-opportunities-2203.17242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-expressed-emotion-from-five-minute-speech-samples-challenges-and-opportunities-2203.17242"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmented-cross-lingual-synthesis-in-a-teacher-student-framework-2204.00061</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmented-cross-lingual-synthesis-in-a-teacher-student-framework-2204.00061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmented-cross-lingual-synthesis-in-a-teacher-student-framework-2204.00061"/></url>
<url><loc>https://scifaro.com/en/abs/speech-and-the-n-back-task-as-a-lens-into-depression-how-combining-both-may-allow-us-to-isolate-different-core-symptoms-of-depression-2204.00088</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-and-the-n-back-task-as-a-lens-into-depression-how-combining-both-may-allow-us-to-isolate-different-core-symptoms-of-depression-2204.00088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-and-the-n-back-task-as-a-lens-into-depression-how-combining-both-may-allow-us-to-isolate-different-core-symptoms-of-depression-2204.00088"/></url>
<url><loc>https://scifaro.com/en/abs/perceptive-non-linear-speech-processing-and-spiking-neural-networks-2204.00094</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptive-non-linear-speech-processing-and-spiking-neural-networks-2204.00094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptive-non-linear-speech-processing-and-spiking-neural-networks-2204.00094"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-hybrid-speech-coding-with-a-mlp-lpc-structure-2204.00245</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-hybrid-speech-coding-with-a-mlp-lpc-structure-2204.00245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-hybrid-speech-coding-with-a-mlp-lpc-structure-2204.00245"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-in-mismatch-training-and-testing-conditions-2204.00311</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-in-mismatch-training-and-testing-conditions-2204.00311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-in-mismatch-training-and-testing-conditions-2204.00311"/></url>
<url><loc>https://scifaro.com/en/abs/using-segment-based-features-of-jaw-movements-to-recognize-foraging-activities-in-grazing-cattle-2204.00331</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-segment-based-features-of-jaw-movements-to-recognize-foraging-activities-in-grazing-cattle-2204.00331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-segment-based-features-of-jaw-movements-to-recognize-foraging-activities-in-grazing-cattle-2204.00331"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-speech-enhancement-and-self-supervised-learning-representation-2204.00540</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-speech-enhancement-and-self-supervised-learning-representation-2204.00540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-speech-enhancement-and-self-supervised-learning-representation-2204.00540"/></url>
<url><loc>https://scifaro.com/en/abs/learning-neural-acoustic-fields-2204.00628</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-neural-acoustic-fields-2204.00628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-neural-acoustic-fields-2204.00628"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-talker-audio-visual-asr-using-an-active-speaker-attention-module-2204.00652</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-talker-audio-visual-asr-using-an-active-speaker-attention-module-2204.00652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-talker-audio-visual-asr-using-an-active-speaker-attention-module-2204.00652"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptation-for-wav2vec2-based-dysarthric-asr-2204.00770</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptation-for-wav2vec2-based-dysarthric-asr-2204.00770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptation-for-wav2vec2-based-dysarthric-asr-2204.00770"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-phone-mask-training-for-phonetic-reduction-robust-e2e-uyghur-speech-recognition-2204.00819</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-phone-mask-training-for-phonetic-reduction-robust-e2e-uyghur-speech-recognition-2204.00819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-phone-mask-training-for-phonetic-reduction-robust-e2e-uyghur-speech-recognition-2204.00819"/></url>
<url><loc>https://scifaro.com/en/abs/improving-target-sound-extraction-with-timestamp-information-2204.00821</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-target-sound-extraction-with-timestamp-information-2204.00821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-target-sound-extraction-with-timestamp-information-2204.00821"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-based-on-speech-decomposition-and-auxiliary-feature-2204.00873</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-based-on-speech-decomposition-and-auxiliary-feature-2204.00873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-based-on-speech-decomposition-and-auxiliary-feature-2204.00873"/></url>
<url><loc>https://scifaro.com/en/abs/an-objective-test-tool-for-pitch-extractors-response-attributes-2204.00902</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-objective-test-tool-for-pitch-extractors-response-attributes-2204.00902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-objective-test-tool-for-pitch-extractors-response-attributes-2204.00902"/></url>
<url><loc>https://scifaro.com/en/abs/stylewavegan-style-based-synthesis-of-drum-sounds-with-extensive-controls-using-generative-adversarial-networks-2204.00907</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylewavegan-style-based-synthesis-of-drum-sounds-with-extensive-controls-using-generative-adversarial-networks-2204.00907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylewavegan-style-based-synthesis-of-drum-sounds-with-extensive-controls-using-generative-adversarial-networks-2204.00907"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-pitch-extractors-response-to-frequency-modulated-multi-component-signals-2204.00911</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-pitch-extractors-response-to-frequency-modulated-multi-component-signals-2204.00911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-pitch-extractors-response-to-frequency-modulated-multi-component-signals-2204.00911"/></url>
<url><loc>https://scifaro.com/en/abs/content-dependent-fine-grained-speaker-embedding-for-zero-shot-speaker-adaptation-in-text-to-speech-synthesis-2204.00990</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-dependent-fine-grained-speaker-embedding-for-zero-shot-speaker-adaptation-in-text-to-speech-synthesis-2204.00990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-dependent-fine-grained-speaker-embedding-for-zero-shot-speaker-adaptation-in-text-to-speech-synthesis-2204.00990"/></url>
<url><loc>https://scifaro.com/en/abs/a-computational-analysis-of-pitch-drift-in-unaccompanied-solo-singing-using-dbscan-clustering-2204.01009</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computational-analysis-of-pitch-drift-in-unaccompanied-solo-singing-using-dbscan-clustering-2204.01009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computational-analysis-of-pitch-drift-in-unaccompanied-solo-singing-using-dbscan-clustering-2204.01009"/></url>
<url><loc>https://scifaro.com/en/abs/on-incorporating-social-speaker-characteristics-in-synthetic-speech-2204.01115</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-incorporating-social-speaker-characteristics-in-synthetic-speech-2204.01115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-incorporating-social-speaker-characteristics-in-synthetic-speech-2204.01115"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-model-size-selection-for-speaker-identification-2204.01294</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-model-size-selection-for-speaker-identification-2204.01294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-model-size-selection-for-speaker-identification-2204.01294"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-vectorial-prediction-with-neural-nets-2204.01295</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-vectorial-prediction-with-neural-nets-2204.01295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-vectorial-prediction-with-neural-nets-2204.01295"/></url>
<url><loc>https://scifaro.com/en/abs/an-initialization-scheme-for-meeting-separation-with-spatial-mixture-models-2204.01338</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initialization-scheme-for-meeting-separation-with-spatial-mixture-models-2204.01338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initialization-scheme-for-meeting-separation-with-spatial-mixture-models-2204.01338"/></url>
<url><loc>https://scifaro.com/en/abs/learning-the-proximity-operator-in-unfolded-admm-for-phase-retrieval-2204.01360</loc><lastmod>2022-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-the-proximity-operator-in-unfolded-admm-for-phase-retrieval-2204.01360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-the-proximity-operator-in-unfolded-admm-for-phase-retrieval-2204.01360"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-ecapa-tdnn-and-wav2vec2-0-embeddings-to-stuttering-detection-2204.01564</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-ecapa-tdnn-and-wav2vec2-0-embeddings-to-stuttering-detection-2204.01564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-ecapa-tdnn-and-wav2vec2-0-embeddings-to-stuttering-detection-2204.01564"/></url>
<url><loc>https://scifaro.com/en/abs/residual-guided-personalized-speech-synthesis-based-on-face-image-2204.01672</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-guided-personalized-speech-synthesis-based-on-face-image-2204.01672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-guided-personalized-speech-synthesis-based-on-face-image-2204.01672"/></url>
<url><loc>https://scifaro.com/en/abs/gwa-a-large-high-quality-acoustic-dataset-for-audio-processing-2204.01787</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gwa-a-large-high-quality-acoustic-dataset-for-audio-processing-2204.01787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gwa-a-large-high-quality-acoustic-dataset-for-audio-processing-2204.01787"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-adapt-to-domain-shifts-with-few-shot-samples-in-anomalous-sound-detection-2204.01905</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-adapt-to-domain-shifts-with-few-shot-samples-in-anomalous-sound-detection-2204.01905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-adapt-to-domain-shifts-with-few-shot-samples-in-anomalous-sound-detection-2204.01905"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-multi-channel-speech-separation-dereverberation-and-recognition-2204.01977</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-multi-channel-speech-separation-dereverberation-and-recognition-2204.01977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-multi-channel-speech-separation-dereverberation-and-recognition-2204.01977"/></url>
<url><loc>https://scifaro.com/en/abs/a-complementary-joint-training-approach-using-unpaired-speech-and-text-for-low-resource-automatic-speech-recognition-2204.02023</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-complementary-joint-training-approach-using-unpaired-speech-and-text-for-low-resource-automatic-speech-recognition-2204.02023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-complementary-joint-training-approach-using-unpaired-speech-and-text-for-low-resource-automatic-speech-recognition-2204.02023"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-verification-2204.02040</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-verification-2204.02040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relevance-of-bandwidth-extension-for-speaker-verification-2204.02040"/></url>
<url><loc>https://scifaro.com/en/abs/a-mixed-supervised-learning-framework-for-target-sound-detection-2204.02088</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mixed-supervised-learning-framework-for-target-sound-detection-2204.02088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mixed-supervised-learning-framework-for-target-sound-detection-2204.02088"/></url>
<url><loc>https://scifaro.com/en/abs/non-linear-speech-coding-with-mlp-rbf-and-elman-based-prediction-2204.02101</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-linear-speech-coding-with-mlp-rbf-and-elman-based-prediction-2204.02101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-linear-speech-coding-with-mlp-rbf-and-elman-based-prediction-2204.02101"/></url>
<url><loc>https://scifaro.com/en/abs/metaaudio-a-few-shot-audio-classification-benchmark-2204.02121</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metaaudio-a-few-shot-audio-classification-benchmark-2204.02121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metaaudio-a-few-shot-audio-classification-benchmark-2204.02121"/></url>
<url><loc>https://scifaro.com/en/abs/radur-a-reference-aware-and-duration-robust-network-for-target-sound-detection-2204.02143</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radur-a-reference-aware-and-duration-robust-network-for-target-sound-detection-2204.02143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radur-a-reference-aware-and-duration-robust-network-for-target-sound-detection-2204.02143"/></url>
<url><loc>https://scifaro.com/en/abs/utmos-utokyo-sarulab-system-for-voicemos-challenge-2022-2204.02152</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utmos-utokyo-sarulab-system-for-voicemos-challenge-2022-2204.02152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utmos-utokyo-sarulab-system-for-voicemos-challenge-2022-2204.02152"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-learning-of-intermediate-acoustic-feature-for-end-to-end-lightweight-text-to-speech-2204.02172</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-learning-of-intermediate-acoustic-feature-for-end-to-end-lightweight-text-to-speech-2204.02172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-learning-of-intermediate-acoustic-feature-for-end-to-end-lightweight-text-to-speech-2204.02172"/></url>
<url><loc>https://scifaro.com/en/abs/repeat-after-me-self-supervised-learning-of-acoustic-to-articulatory-mapping-by-vocal-imitation-2204.02269</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repeat-after-me-self-supervised-learning-of-acoustic-to-articulatory-mapping-by-vocal-imitation-2204.02269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repeat-after-me-self-supervised-learning-of-acoustic-to-articulatory-mapping-by-vocal-imitation-2204.02269"/></url>
<url><loc>https://scifaro.com/en/abs/how-information-on-acoustic-scenes-and-sound-events-mutually-benefits-event-detection-and-scene-classification-tasks-2204.02279</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-information-on-acoustic-scenes-and-sound-events-mutually-benefits-event-detection-and-scene-classification-tasks-2204.02279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-information-on-acoustic-scenes-and-sound-events-mutually-benefits-event-detection-and-scene-classification-tasks-2204.02279"/></url>
<url><loc>https://scifaro.com/en/abs/what-can-predictive-speech-coders-learn-from-speaker-recognizers-2204.02400</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-can-predictive-speech-coders-learn-from-speaker-recognizers-2204.02400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-can-predictive-speech-coders-learn-from-speaker-recognizers-2204.02400"/></url>
<url><loc>https://scifaro.com/en/abs/improving-voice-trigger-detection-with-metric-learning-2204.02455</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-voice-trigger-detection-with-metric-learning-2204.02455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-voice-trigger-detection-with-metric-learning-2204.02455"/></url>
<url><loc>https://scifaro.com/en/abs/simple-and-effective-unsupervised-speech-synthesis-2204.02524</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simple-and-effective-unsupervised-speech-synthesis-2204.02524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simple-and-effective-unsupervised-speech-synthesis-2204.02524"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-nonlinear-speaker-parameterization-algorithm-for-speaker-identification-2204.02609</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-nonlinear-speaker-parameterization-algorithm-for-speaker-identification-2204.02609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-nonlinear-speaker-parameterization-algorithm-for-speaker-identification-2204.02609"/></url>
<url><loc>https://scifaro.com/en/abs/towards-multi-scale-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2204.02743</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-multi-scale-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2204.02743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-multi-scale-speaking-style-modelling-with-hierarchical-context-information-for-mandarin-speech-synthesis-2204.02743"/></url>
<url><loc>https://scifaro.com/en/abs/federated-self-supervised-speech-representations-are-we-there-yet-2204.02804</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-self-supervised-speech-representations-are-we-there-yet-2204.02804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-self-supervised-speech-representations-are-we-there-yet-2204.02804"/></url>
<url><loc>https://scifaro.com/en/abs/aggression-in-hindi-and-english-speech-acoustic-correlates-and-automatic-identification-2204.02814</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aggression-in-hindi-and-english-speech-acoustic-correlates-and-automatic-identification-2204.02814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aggression-in-hindi-and-english-speech-acoustic-correlates-and-automatic-identification-2204.02814"/></url>
<url><loc>https://scifaro.com/en/abs/somos-the-samsung-open-mos-dataset-for-the-evaluation-of-neural-text-to-speech-synthesis-2204.03040</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/somos-the-samsung-open-mos-dataset-for-the-evaluation-of-neural-text-to-speech-synthesis-2204.03040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/somos-the-samsung-open-mos-dataset-for-the-evaluation-of-neural-text-to-speech-synthesis-2204.03040"/></url>
<url><loc>https://scifaro.com/en/abs/ffc-se-fast-fourier-convolution-for-speech-enhancement-2204.03042</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ffc-se-fast-fourier-convolution-for-speech-enhancement-2204.03042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ffc-se-fast-fourier-convolution-for-speech-enhancement-2204.03042"/></url>
<url><loc>https://scifaro.com/en/abs/3m-multi-loss-multi-path-and-multi-level-neural-networks-for-speech-recognition-2204.03178</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3m-multi-loss-multi-path-and-multi-level-neural-networks-for-speech-recognition-2204.03178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3m-multi-loss-multi-path-and-multi-level-neural-networks-for-speech-recognition-2204.03178"/></url>
<url><loc>https://scifaro.com/en/abs/speech-pre-training-with-acoustic-piece-2204.03240</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-pre-training-with-acoustic-piece-2204.03240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-pre-training-with-acoustic-piece-2204.03240"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-singing-synthesis-using-local-style-token-and-dual-path-pitch-encoder-2204.03249</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-singing-synthesis-using-local-style-token-and-dual-path-pitch-encoder-2204.03249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-singing-synthesis-using-local-style-token-and-dual-path-pitch-encoder-2204.03249"/></url>
<url><loc>https://scifaro.com/en/abs/arabic-text-to-speech-tts-data-preparation-2204.03255</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arabic-text-to-speech-tts-data-preparation-2204.03255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arabic-text-to-speech-tts-data-preparation-2204.03255"/></url>
<url><loc>https://scifaro.com/en/abs/genre-conditioned-acoustic-models-for-automatic-lyrics-transcription-of-polyphonic-music-2204.03307</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/genre-conditioned-acoustic-models-for-automatic-lyrics-transcription-of-polyphonic-music-2204.03307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/genre-conditioned-acoustic-models-for-automatic-lyrics-transcription-of-polyphonic-music-2204.03307"/></url>
<url><loc>https://scifaro.com/en/abs/linguistic-acoustic-similarity-based-accent-shift-for-accent-recognition-2204.03398</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linguistic-acoustic-similarity-based-accent-shift-for-accent-recognition-2204.03398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linguistic-acoustic-similarity-based-accent-shift-for-accent-recognition-2204.03398"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-robust-voice-cloning-2204.03421</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-robust-voice-cloning-2204.03421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-robust-voice-cloning-2204.03421"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-target-speech-separation-2204.03594</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-target-speech-separation-2204.03594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-target-speech-separation-2204.03594"/></url>
<url><loc>https://scifaro.com/en/abs/successes-and-critical-failures-of-neural-networks-in-capturing-human-like-speech-recognition-2204.03740</loc><lastmod>2023-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/successes-and-critical-failures-of-neural-networks-in-capturing-human-like-speech-recognition-2204.03740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/successes-and-critical-failures-of-neural-networks-in-capturing-human-like-speech-recognition-2204.03740"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-exemplar-autoencoder-with-cycle-consistency-loss-in-any-to-one-voice-conversion-2204.03847</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-exemplar-autoencoder-with-cycle-consistency-loss-in-any-to-one-voice-conversion-2204.03847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-exemplar-autoencoder-with-cycle-consistency-loss-in-any-to-one-voice-conversion-2204.03847"/></url>
<url><loc>https://scifaro.com/en/abs/reliable-visualization-for-deep-speaker-recognition-2204.03852</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reliable-visualization-for-deep-speaker-recognition-2204.03852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reliable-visualization-for-deep-speaker-recognition-2204.03852"/></url>
<url><loc>https://scifaro.com/en/abs/adding-connectionist-temporal-summarization-into-conformer-to-improve-its-decoder-efficiency-for-speech-recognition-2204.03889</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adding-connectionist-temporal-summarization-into-conformer-to-improve-its-decoder-efficiency-for-speech-recognition-2204.03889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adding-connectionist-temporal-summarization-into-conformer-to-improve-its-decoder-efficiency-for-speech-recognition-2204.03889"/></url>
<url><loc>https://scifaro.com/en/abs/the-sillwood-technologies-system-for-the-voicemos-challenge-2022-2204.03967</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sillwood-technologies-system-for-the-voicemos-challenge-2022-2204.03967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sillwood-technologies-system-for-the-voicemos-challenge-2022-2204.03967"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speaker-diarization-2204.04166</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speaker-diarization-2204.04166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speaker-diarization-2204.04166"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-speech-separation-with-narrow-band-conformer-2204.04464</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-speech-separation-with-narrow-band-conformer-2204.04464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-speech-separation-with-narrow-band-conformer-2204.04464"/></url>
<url><loc>https://scifaro.com/en/abs/inferring-pitch-from-coarse-spectral-features-2204.04579</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inferring-pitch-from-coarse-spectral-features-2204.04579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inferring-pitch-from-coarse-spectral-features-2204.04579"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-audio-and-text-pre-training-with-extremely-low-resource-parallel-data-2204.04645</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-audio-and-text-pre-training-with-extremely-low-resource-parallel-data-2204.04645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-audio-and-text-pre-training-with-extremely-low-resource-parallel-data-2204.04645"/></url>
<url><loc>https://scifaro.com/en/abs/deep-embeddings-for-robust-user-based-amateur-vocal-percussion-classification-2204.04646</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-embeddings-for-robust-user-based-amateur-vocal-percussion-classification-2204.04646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-embeddings-for-robust-user-based-amateur-vocal-percussion-classification-2204.04646"/></url>
<url><loc>https://scifaro.com/en/abs/deep-conditional-representation-learning-for-drum-sample-retrieval-by-vocalisation-2204.04651</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-conditional-representation-learning-for-drum-sample-retrieval-by-vocalisation-2204.04651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-conditional-representation-learning-for-drum-sample-retrieval-by-vocalisation-2204.04651"/></url>
<url><loc>https://scifaro.com/en/abs/towards-evaluation-of-autonomously-generated-musical-compositions-a-comprehensive-survey-2204.04756</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-evaluation-of-autonomously-generated-musical-compositions-a-comprehensive-survey-2204.04756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-evaluation-of-autonomously-generated-musical-compositions-a-comprehensive-survey-2204.04756"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-pragmatism-of-using-binary-classifiers-over-data-intensive-neural-network-classifiers-for-detection-of-covid-19-from-voice-2204.04802</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-pragmatism-of-using-binary-classifiers-over-data-intensive-neural-network-classifiers-for-detection-of-covid-19-from-voice-2204.04802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-pragmatism-of-using-binary-classifiers-over-data-intensive-neural-network-classifiers-for-detection-of-covid-19-from-voice-2204.04802"/></url>
<url><loc>https://scifaro.com/en/abs/fusion-of-self-supervised-learned-models-for-mos-prediction-2204.04855</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusion-of-self-supervised-learned-models-for-mos-prediction-2204.04855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusion-of-self-supervised-learned-models-for-mos-prediction-2204.04855"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-noise-control-for-multispeaker-speech-synthesis-2204.05070</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-noise-control-for-multispeaker-speech-synthesis-2204.05070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-noise-control-for-multispeaker-speech-synthesis-2204.05070"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-to-improving-sound-based-vehicle-speed-estimation-2204.05082</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-to-improving-sound-based-vehicle-speed-estimation-2204.05082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-to-improving-sound-based-vehicle-speed-estimation-2204.05082"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-listen-rethinking-visual-sound-localization-2204.05156</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-listen-rethinking-visual-sound-localization-2204.05156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-listen-rethinking-visual-sound-localization-2204.05156"/></url>
<url><loc>https://scifaro.com/en/abs/interspeech-2022-audio-deep-packet-loss-concealment-challenge-2204.05222</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interspeech-2022-audio-deep-packet-loss-concealment-challenge-2204.05222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interspeech-2022-audio-deep-packet-loss-concealment-challenge-2204.05222"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-multi-channel-convmixer-for-keyword-spotting-with-centroid-based-awareness-2204.05445</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-multi-channel-convmixer-for-keyword-spotting-with-centroid-based-awareness-2204.05445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-multi-channel-convmixer-for-keyword-spotting-with-centroid-based-awareness-2204.05445"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-with-global-aware-fusion-on-multi-scale-feature-representation-2204.05571</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-global-aware-fusion-on-multi-scale-feature-representation-2204.05571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-global-aware-fusion-on-multi-scale-feature-representation-2204.05571"/></url>
<url><loc>https://scifaro.com/en/abs/adff-attention-based-deep-feature-fusion-approach-for-music-emotion-recognition-2204.05649</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adff-attention-based-deep-feature-fusion-approach-for-music-emotion-recognition-2204.05649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adff-attention-based-deep-feature-fusion-approach-for-music-emotion-recognition-2204.05649"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-triage-detecting-sound-events-considering-priority-of-classes-2204.06402</loc><lastmod>2023-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-triage-detecting-sound-events-considering-priority-of-classes-2204.06402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-triage-detecting-sound-events-considering-priority-of-classes-2204.06402"/></url>
<url><loc>https://scifaro.com/en/abs/receptive-field-analysis-of-temporal-convolutional-networks-for-monaural-speech-dereverberation-2204.06439</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/receptive-field-analysis-of-temporal-convolutional-networks-for-monaural-speech-dereverberation-2204.06439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/receptive-field-analysis-of-temporal-convolutional-networks-for-monaural-speech-dereverberation-2204.06439"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-speech-pathology-on-automatic-speaker-verification-a-large-scale-study-2204.06450</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-speech-pathology-on-automatic-speaker-verification-a-large-scale-study-2204.06450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-speech-pathology-on-automatic-speaker-verification-a-large-scale-study-2204.06450"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-score-distribution-to-improve-non-intrusive-speech-quality-estimation-2204.06616</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-score-distribution-to-improve-non-intrusive-speech-quality-estimation-2204.06616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-score-distribution-to-improve-non-intrusive-speech-quality-estimation-2204.06616"/></url>
<url><loc>https://scifaro.com/en/abs/from-environmental-sound-representation-to-robustness-of-2d-cnn-models-against-adversarial-attacks-2204.07018</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-environmental-sound-representation-to-robustness-of-2d-cnn-models-against-adversarial-attacks-2204.07018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-environmental-sound-representation-to-robustness-of-2d-cnn-models-against-adversarial-attacks-2204.07018"/></url>
<url><loc>https://scifaro.com/en/abs/streamable-neural-audio-synthesis-with-non-causal-convolutions-2204.07064</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamable-neural-audio-synthesis-with-non-causal-convolutions-2204.07064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamable-neural-audio-synthesis-with-non-causal-convolutions-2204.07064"/></url>
<url><loc>https://scifaro.com/en/abs/learning-and-controlling-the-source-filter-representation-of-speech-with-a-variational-autoencoder-2204.07075</loc><lastmod>2023-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-and-controlling-the-source-filter-representation-of-speech-with-a-variational-autoencoder-2204.07075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-and-controlling-the-source-filter-representation-of-speech-with-a-variational-autoencoder-2204.07075"/></url>
<url><loc>https://scifaro.com/en/abs/deep-cardiosound-an-ensembled-deep-learning-model-for-heart-sound-multilabelling-2204.07420</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-cardiosound-an-ensembled-deep-learning-model-for-heart-sound-multilabelling-2204.07420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-cardiosound-an-ensembled-deep-learning-model-for-heart-sound-multilabelling-2204.07420"/></url>
<url><loc>https://scifaro.com/en/abs/improving-frame-online-neural-speech-enhancement-with-overlapped-frame-prediction-2204.07566</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-frame-online-neural-speech-enhancement-with-overlapped-frame-prediction-2204.07566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-frame-online-neural-speech-enhancement-with-overlapped-frame-prediction-2204.07566"/></url>
<url><loc>https://scifaro.com/en/abs/ufrc-a-unified-framework-for-reliable-covid-19-detection-on-crowdsourced-cough-audio-2204.07763</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ufrc-a-unified-framework-for-reliable-covid-19-detection-on-crowdsourced-cough-audio-2204.07763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ufrc-a-unified-framework-for-reliable-covid-19-detection-on-crowdsourced-cough-audio-2204.07763"/></url>
<url><loc>https://scifaro.com/en/abs/advances-in-thunder-sound-synthesis-2204.08026</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advances-in-thunder-sound-synthesis-2204.08026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advances-in-thunder-sound-synthesis-2204.08026"/></url>
<url><loc>https://scifaro.com/en/abs/robust-end-to-end-speaker-diarization-with-generic-neural-clustering-2204.08164</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-end-to-end-speaker-diarization-with-generic-neural-clustering-2204.08164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-end-to-end-speaker-diarization-with-generic-neural-clustering-2204.08164"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-time-frequency-scattering-on-gpu-2204.08269</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-time-frequency-scattering-on-gpu-2204.08269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-time-frequency-scattering-on-gpu-2204.08269"/></url>
<url><loc>https://scifaro.com/en/abs/extracting-targeted-training-data-from-asr-models-and-how-to-mitigate-it-2204.08345</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extracting-targeted-training-data-from-asr-models-and-how-to-mitigate-it-2204.08345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extracting-targeted-training-data-from-asr-models-and-how-to-mitigate-it-2204.08345"/></url>
<url><loc>https://scifaro.com/en/abs/caption-feature-space-regularization-for-audio-captioning-2204.08409</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/caption-feature-space-regularization-for-audio-captioning-2204.08409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/caption-feature-space-regularization-for-audio-captioning-2204.08409"/></url>
<url><loc>https://scifaro.com/en/abs/ab-ba-analysis-a-framework-for-estimating-keyword-spotting-recall-improvement-while-maintaining-audio-privacy-2204.08474</loc><lastmod>2022-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ab-ba-analysis-a-framework-for-estimating-keyword-spotting-recall-improvement-while-maintaining-audio-privacy-2204.08474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ab-ba-analysis-a-framework-for-estimating-keyword-spotting-recall-improvement-while-maintaining-audio-privacy-2204.08474"/></url>
<url><loc>https://scifaro.com/en/abs/automated-audio-captioning-using-audio-event-clues-2204.08567</loc><lastmod>2022-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-audio-captioning-using-audio-event-clues-2204.08567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-audio-captioning-using-audio-event-clues-2204.08567"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-adversarial-domain-adaptation-for-cross-corpus-and-cross-language-speech-emotion-recognition-2204.08625</loc><lastmod>2022-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-adversarial-domain-adaptation-for-cross-corpus-and-cross-language-speech-emotion-recognition-2204.08625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-adversarial-domain-adaptation-for-cross-corpus-and-cross-language-speech-emotion-recognition-2204.08625"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-wake-word-spotting-system-for-misp-challenge-2021-2204.08686</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-wake-word-spotting-system-for-misp-challenge-2021-2204.08686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-wake-word-spotting-system-for-misp-challenge-2021-2204.08686"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-attentional-neural-framework-for-structure-aware-performance-score-synchronization-2204.08822</loc><lastmod>2022-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-attentional-neural-framework-for-structure-aware-performance-score-synchronization-2204.08822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-attentional-neural-framework-for-structure-aware-performance-score-synchronization-2204.08822"/></url>
<url><loc>https://scifaro.com/en/abs/disappeared-command-spoofing-attack-on-automatic-speech-recognition-systems-with-sound-masking-2204.08977</loc><lastmod>2022-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disappeared-command-spoofing-attack-on-automatic-speech-recognition-systems-with-sound-masking-2204.08977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disappeared-command-spoofing-attack-on-automatic-speech-recognition-systems-with-sound-masking-2204.08977"/></url>
<url><loc>https://scifaro.com/en/abs/contentvec-an-improved-self-supervised-speech-representation-by-disentangling-speakers-2204.09224</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contentvec-an-improved-self-supervised-speech-representation-by-disentangling-speakers-2204.09224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contentvec-an-improved-self-supervised-speech-representation-by-disentangling-speakers-2204.09224"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-strategies-for-articulatory-synthesis-of-complex-syllable-onsets-2204.09381</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-strategies-for-articulatory-synthesis-of-complex-syllable-onsets-2204.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-strategies-for-articulatory-synthesis-of-complex-syllable-onsets-2204.09381"/></url>
<url><loc>https://scifaro.com/en/abs/clotho-aqa-a-crowdsourced-dataset-for-audio-question-answering-2204.09634</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clotho-aqa-a-crowdsourced-dataset-for-audio-question-answering-2204.09634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clotho-aqa-a-crowdsourced-dataset-for-audio-question-answering-2204.09634"/></url>
<url><loc>https://scifaro.com/en/abs/layer-wise-fast-adaptation-for-end-to-end-multi-accent-speech-recognition-2204.09883</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/layer-wise-fast-adaptation-for-end-to-end-multi-accent-speech-recognition-2204.09883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/layer-wise-fast-adaptation-for-end-to-end-multi-accent-speech-recognition-2204.09883"/></url>
<url><loc>https://scifaro.com/en/abs/stft-domain-neural-speech-enhancement-with-very-low-algorithmic-latency-2204.09911</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stft-domain-neural-speech-enhancement-with-very-low-algorithmic-latency-2204.09911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stft-domain-neural-speech-enhancement-with-very-low-algorithmic-latency-2204.09911"/></url>
<url><loc>https://scifaro.com/en/abs/sintra-learning-an-inspiration-model-from-a-single-multi-track-music-segment-2204.09917</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sintra-learning-an-inspiration-model-from-a-single-multi-track-music-segment-2204.09917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sintra-learning-an-inspiration-model-from-a-single-multi-track-music-segment-2204.09917"/></url>
<url><loc>https://scifaro.com/en/abs/baseline-systems-for-the-first-spoofing-aware-speaker-verification-challenge-score-and-embedding-fusion-2204.09976</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/baseline-systems-for-the-first-spoofing-aware-speaker-verification-challenge-score-and-embedding-fusion-2204.09976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/baseline-systems-for-the-first-spoofing-aware-speaker-verification-challenge-score-and-embedding-fusion-2204.09976"/></url>
<url><loc>https://scifaro.com/en/abs/physical-modeling-using-recurrent-neural-networks-with-fast-convolutional-layers-2204.10125</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physical-modeling-using-recurrent-neural-networks-with-fast-convolutional-layers-2204.10125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physical-modeling-using-recurrent-neural-networks-with-fast-convolutional-layers-2204.10125"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-cosine-and-plda-back-ends-for-speaker-verification-2204.10523</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-cosine-and-plda-back-ends-for-speaker-verification-2204.10523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-cosine-and-plda-back-ends-for-speaker-verification-2204.10523"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-rate-controllable-hifi-gan-using-feature-interpolation-2204.10561</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-rate-controllable-hifi-gan-using-feature-interpolation-2204.10561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-rate-controllable-hifi-gan-using-feature-interpolation-2204.10561"/></url>
<url><loc>https://scifaro.com/en/abs/fused-audio-instance-and-representation-for-respiratory-disease-detection-2204.10581</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fused-audio-instance-and-representation-for-respiratory-disease-detection-2204.10581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fused-audio-instance-and-representation-for-respiratory-disease-detection-2204.10581"/></url>
<url><loc>https://scifaro.com/en/abs/e2e-segmenter-joint-segmenting-and-decoding-for-long-form-asr-2204.10749</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e2e-segmenter-joint-segmenting-and-decoding-for-long-form-asr-2204.10749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e2e-segmenter-joint-segmenting-and-decoding-for-long-form-asr-2204.10749"/></url>
<url><loc>https://scifaro.com/en/abs/musical-stylistic-analysis-a-study-of-intervallic-transition-graphs-via-persistent-homology-2204.11139</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-stylistic-analysis-a-study-of-intervallic-transition-graphs-via-persistent-homology-2204.11139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-stylistic-analysis-a-study-of-intervallic-transition-graphs-via-persistent-homology-2204.11139"/></url>
<url><loc>https://scifaro.com/en/abs/dictionary-attacks-on-speaker-verification-2204.11304</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dictionary-attacks-on-speaker-verification-2204.11304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dictionary-attacks-on-speaker-verification-2204.11304"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-aware-transformer-encoder-for-empathetic-dialogue-generation-2204.11320</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-aware-transformer-encoder-for-empathetic-dialogue-generation-2204.11320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-aware-transformer-encoder-for-empathetic-dialogue-generation-2204.11320"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-emotion-recognition-based-on-syllable-level-feature-extraction-2204.11382</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-emotion-recognition-based-on-syllable-level-feature-extraction-2204.11382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-emotion-recognition-based-on-syllable-level-feature-extraction-2204.11382"/></url>
<url><loc>https://scifaro.com/en/abs/back-ends-selection-for-deep-speaker-embeddings-2204.11403</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/back-ends-selection-for-deep-speaker-embeddings-2204.11403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/back-ends-selection-for-deep-speaker-embeddings-2204.11403"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-audio-features-via-trainable-basis-functions-2204.11437</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-audio-features-via-trainable-basis-functions-2204.11437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-audio-features-via-trainable-basis-functions-2204.11437"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-audio-strikes-back-boosting-augmentations-towards-an-efficient-audio-classification-network-2204.11479</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-audio-strikes-back-boosting-augmentations-towards-an-efficient-audio-classification-network-2204.11479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-audio-strikes-back-boosting-augmentations-towards-an-efficient-audio-classification-network-2204.11479"/></url>
<url><loc>https://scifaro.com/en/abs/syntaspeech-syntax-aware-generative-adversarial-text-to-speech-2204.11792</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syntaspeech-syntax-aware-generative-adversarial-text-to-speech-2204.11792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syntaspeech-syntax-aware-generative-adversarial-text-to-speech-2204.11792"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-synthesis-for-autoregressive-speech-generation-2204.11806</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-synthesis-for-autoregressive-speech-generation-2204.11806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-synthesis-for-autoregressive-speech-generation-2204.11806"/></url>
<url><loc>https://scifaro.com/en/abs/meta-af-meta-learning-for-adaptive-filters-2204.11942</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-af-meta-learning-for-adaptive-filters-2204.11942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-af-meta-learning-for-adaptive-filters-2204.11942"/></url>
<url><loc>https://scifaro.com/en/abs/reformulating-speaker-diarization-as-community-detection-with-emphasis-on-topological-structure-2204.12112</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reformulating-speaker-diarization-as-community-detection-with-emphasis-on-topological-structure-2204.12112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reformulating-speaker-diarization-as-community-detection-with-emphasis-on-topological-structure-2204.12112"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-approaches-to-acoustic-scene-classification-using-cnns-2204.12177</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-approaches-to-acoustic-scene-classification-using-cnns-2204.12177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-approaches-to-acoustic-scene-classification-using-cnns-2204.12177"/></url>
<url><loc>https://scifaro.com/en/abs/on-machine-learning-driven-surrogates-for-sound-transmission-loss-simulations-2204.12290</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-machine-learning-driven-surrogates-for-sound-transmission-loss-simulations-2204.12290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-machine-learning-driven-surrogates-for-sound-transmission-loss-simulations-2204.12290"/></url>
<url><loc>https://scifaro.com/en/abs/measurement-uncertainty-and-unicity-of-single-number-quantities-describing-the-spatial-decay-of-speech-level-in-open-plan-offices-2204.12486</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measurement-uncertainty-and-unicity-of-single-number-quantities-describing-the-spatial-decay-of-speech-level-in-open-plan-offices-2204.12486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measurement-uncertainty-and-unicity-of-single-number-quantities-describing-the-spatial-decay-of-speech-level-in-open-plan-offices-2204.12486"/></url>
<url><loc>https://scifaro.com/en/abs/named-entity-recognition-for-audio-de-identification-2204.12622</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/named-entity-recognition-for-audio-de-identification-2204.12622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/named-entity-recognition-for-audio-de-identification-2204.12622"/></url>
<url><loc>https://scifaro.com/en/abs/masked-spectrogram-prediction-for-self-supervised-audio-pre-training-2204.12768</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-spectrogram-prediction-for-self-supervised-audio-pre-training-2204.12768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-spectrogram-prediction-for-self-supervised-audio-pre-training-2204.12768"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-word-segmentation-using-k-nearest-neighbors-2204.13094</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-word-segmentation-using-k-nearest-neighbors-2204.13094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-word-segmentation-using-k-nearest-neighbors-2204.13094"/></url>
<url><loc>https://scifaro.com/en/abs/improving-multimodal-speech-recognition-by-data-augmentation-and-speech-representations-2204.13206</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-multimodal-speech-recognition-by-data-augmentation-and-speech-representations-2204.13206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-multimodal-speech-recognition-by-data-augmentation-and-speech-representations-2204.13206"/></url>
<url><loc>https://scifaro.com/en/abs/music-enhancement-via-image-translation-and-vocoding-2204.13289</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-enhancement-via-image-translation-and-vocoding-2204.13289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-enhancement-via-image-translation-and-vocoding-2204.13289"/></url>
<url><loc>https://scifaro.com/en/abs/pseudo-strong-labels-for-large-scale-weakly-supervised-audio-tagging-2204.13430</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseudo-strong-labels-for-large-scale-weakly-supervised-audio-tagging-2204.13430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseudo-strong-labels-for-large-scale-weakly-supervised-audio-tagging-2204.13430"/></url>
<url><loc>https://scifaro.com/en/abs/regotron-regularizing-the-tacotron2-architecture-via-monotonic-alignment-loss-2204.13437</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regotron-regularizing-the-tacotron2-architecture-via-monotonic-alignment-loss-2204.13437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regotron-regularizing-the-tacotron2-architecture-via-monotonic-alignment-loss-2204.13437"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-in-persian-speech-using-deep-neural-networks-2204.13601</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-in-persian-speech-using-deep-neural-networks-2204.13601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-in-persian-speech-using-deep-neural-networks-2204.13601"/></url>
<url><loc>https://scifaro.com/en/abs/unaligned-supervision-for-automatic-music-transcription-in-the-wild-2204.13668</loc><lastmod>2022-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unaligned-supervision-for-automatic-music-transcription-in-the-wild-2204.13668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unaligned-supervision-for-automatic-music-transcription-in-the-wild-2204.13668"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-voice-face-representation-learning-by-cross-modal-prototype-contrast-2204.14057</loc><lastmod>2022-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-voice-face-representation-learning-by-cross-modal-prototype-contrast-2204.14057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-voice-face-representation-learning-by-cross-modal-prototype-contrast-2204.14057"/></url>
<url><loc>https://scifaro.com/en/abs/taylor-can-you-hear-me-now-a-taylor-unfolding-framework-for-monaural-speech-enhancement-2205.00206</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taylor-can-you-hear-me-now-a-taylor-unfolding-framework-for-monaural-speech-enhancement-2205.00206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taylor-can-you-hear-me-now-a-taylor-unfolding-framework-for-monaural-speech-enhancement-2205.00206"/></url>
<url><loc>https://scifaro.com/en/abs/relation-guided-acoustic-scene-classification-aided-with-event-embeddings-2205.00499</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relation-guided-acoustic-scene-classification-aided-with-event-embeddings-2205.00499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relation-guided-acoustic-scene-classification-aided-with-event-embeddings-2205.00499"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-speech-driven-lip-sync-model-with-cnn-and-lstm-2205.00916</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-speech-driven-lip-sync-model-with-cnn-and-lstm-2205.00916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-speech-driven-lip-sync-model-with-cnn-and-lstm-2205.00916"/></url>
<url><loc>https://scifaro.com/en/abs/music-interpretation-analysis-a-multimodal-approach-to-score-informed-resynthesis-of-piano-recordings-2205.00941</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-interpretation-analysis-a-multimodal-approach-to-score-informed-resynthesis-of-piano-recordings-2205.00941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-interpretation-analysis-a-multimodal-approach-to-score-informed-resynthesis-of-piano-recordings-2205.00941"/></url>
<url><loc>https://scifaro.com/en/abs/harmof0-logarithmic-scale-dilated-convolution-for-pitch-estimation-2205.01019</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmof0-logarithmic-scale-dilated-convolution-for-pitch-estimation-2205.01019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmof0-logarithmic-scale-dilated-convolution-for-pitch-estimation-2205.01019"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-musical-source-separation-2205.01273</loc><lastmod>2022-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-musical-source-separation-2205.01273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-musical-source-separation-2205.01273"/></url>
<url><loc>https://scifaro.com/en/abs/on-monoaural-speech-enhancement-for-automatic-recognition-of-real-noisy-speech-using-mixture-invariant-training-2205.01751</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-monoaural-speech-enhancement-for-automatic-recognition-of-real-noisy-speech-using-mixture-invariant-training-2205.01751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-monoaural-speech-enhancement-for-automatic-recognition-of-real-noisy-speech-using-mixture-invariant-training-2205.01751"/></url>
<url><loc>https://scifaro.com/en/abs/synthesized-speech-detection-using-convolutional-transformer-based-spectrogram-analysis-2205.01800</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesized-speech-detection-using-convolutional-transformer-based-spectrogram-analysis-2205.01800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesized-speech-detection-using-convolutional-transformer-based-spectrogram-analysis-2205.01800"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-based-detection-of-generated-audio-2205.01806</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-based-detection-of-generated-audio-2205.01806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-based-detection-of-generated-audio-2205.01806"/></url>
<url><loc>https://scifaro.com/en/abs/svts-scalable-video-to-speech-synthesis-2205.02058</loc><lastmod>2022-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svts-scalable-video-to-speech-synthesis-2205.02058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svts-scalable-video-to-speech-synthesis-2205.02058"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-in-the-wild-2205.02475</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-in-the-wild-2205.02475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-in-the-wild-2205.02475"/></url>
<url><loc>https://scifaro.com/en/abs/m2r2-missing-modality-robust-emotion-recognition-framework-with-iterative-data-augmentation-2205.02524</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2r2-missing-modality-robust-emotion-recognition-framework-with-iterative-data-augmentation-2205.02524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2r2-missing-modality-robust-emotion-recognition-framework-with-iterative-data-augmentation-2205.02524"/></url>
<url><loc>https://scifaro.com/en/abs/sound2synth-interpreting-sound-via-fm-synthesizer-parameters-estimation-2205.03043</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound2synth-interpreting-sound-via-fm-synthesizer-parameters-estimation-2205.03043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound2synth-interpreting-sound-via-fm-synthesizer-parameters-estimation-2205.03043"/></url>
<url><loc>https://scifaro.com/en/abs/musical-score-following-and-audio-alignment-2205.03247</loc><lastmod>2022-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-score-following-and-audio-alignment-2205.03247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-score-following-and-audio-alignment-2205.03247"/></url>
<url><loc>https://scifaro.com/en/abs/robustness-of-neural-architectures-for-audio-event-detection-2205.03268</loc><lastmod>2022-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustness-of-neural-architectures-for-audio-event-detection-2205.03268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustness-of-neural-architectures-for-audio-event-detection-2205.03268"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-multi-aspect-multi-granularity-non-native-english-speaker-pronunciation-assessment-2205.03432</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-multi-aspect-multi-granularity-non-native-english-speaker-pronunciation-assessment-2205.03432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-multi-aspect-multi-granularity-non-native-english-speaker-pronunciation-assessment-2205.03432"/></url>
<url><loc>https://scifaro.com/en/abs/vocalsound-a-dataset-for-improving-human-vocal-sounds-recognition-2205.03433</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocalsound-a-dataset-for-improving-human-vocal-sounds-recognition-2205.03433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocalsound-a-dataset-for-improving-human-vocal-sounds-recognition-2205.03433"/></url>
<url><loc>https://scifaro.com/en/abs/muskits-an-end-to-end-music-processing-toolkit-for-singing-voice-synthesis-2205.04029</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muskits-an-end-to-end-music-processing-toolkit-for-singing-voice-synthesis-2205.04029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muskits-an-end-to-end-music-processing-toolkit-for-singing-voice-synthesis-2205.04029"/></url>
<url><loc>https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-non-autoregressive-text-to-speech-2205.04120</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-non-autoregressive-text-to-speech-2205.04120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-non-autoregressive-text-to-speech-2205.04120"/></url>
<url><loc>https://scifaro.com/en/abs/insights-on-modelling-physiological-appraisal-and-affective-indicators-of-stress-using-audio-features-2205.04328</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/insights-on-modelling-physiological-appraisal-and-affective-indicators-of-stress-using-audio-features-2205.04328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/insights-on-modelling-physiological-appraisal-and-affective-indicators-of-stress-using-audio-features-2205.04328"/></url>
<url><loc>https://scifaro.com/en/abs/fatigue-prediction-in-outdoor-running-conditions-using-audio-data-2205.04343</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fatigue-prediction-in-outdoor-running-conditions-using-audio-data-2205.04343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fatigue-prediction-in-outdoor-running-conditions-using-audio-data-2205.04343"/></url>
<url><loc>https://scifaro.com/en/abs/gamified-speaker-comparison-by-listening-2205.04923</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gamified-speaker-comparison-by-listening-2205.04923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gamified-speaker-comparison-by-listening-2205.04923"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-fast-multichannel-nonnegative-matrix-factorization-based-on-gaussian-scale-mixtures-for-blind-source-separation-2205.05330</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-fast-multichannel-nonnegative-matrix-factorization-based-on-gaussian-scale-mixtures-for-blind-source-separation-2205.05330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-fast-multichannel-nonnegative-matrix-factorization-based-on-gaussian-scale-mixtures-for-blind-source-separation-2205.05330"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-the-status-quo-a-contemporary-survey-of-advances-and-challenges-in-audio-captioning-2205.05357</loc><lastmod>2023-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-the-status-quo-a-contemporary-survey-of-advances-and-challenges-in-audio-captioning-2205.05357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-the-status-quo-a-contemporary-survey-of-advances-and-challenges-in-audio-captioning-2205.05357"/></url>
<url><loc>https://scifaro.com/en/abs/symphony-generation-with-permutation-invariant-language-model-2205.05448</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symphony-generation-with-permutation-invariant-language-model-2205.05448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symphony-generation-with-permutation-invariant-language-model-2205.05448"/></url>
<url><loc>https://scifaro.com/en/abs/scream-detection-in-heavy-metal-music-2205.05580</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scream-detection-in-heavy-metal-music-2205.05580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scream-detection-in-heavy-metal-music-2205.05580"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-unsupervised-disentanglement-of-sequential-data-a-case-study-using-music-audio-2205.05871</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-unsupervised-disentanglement-of-sequential-data-a-case-study-using-music-audio-2205.05871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-unsupervised-disentanglement-of-sequential-data-a-case-study-using-music-audio-2205.05871"/></url>
<url><loc>https://scifaro.com/en/abs/unified-source-filter-gan-with-harmonic-plus-noise-source-excitation-generation-2205.06053</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-source-filter-gan-with-harmonic-plus-noise-source-excitation-generation-2205.06053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-source-filter-gan-with-harmonic-plus-noise-source-excitation-generation-2205.06053"/></url>
<url><loc>https://scifaro.com/en/abs/data-aided-underwater-acoustic-ray-propagation-modeling-2205.06066</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-aided-underwater-acoustic-ray-propagation-modeling-2205.06066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-aided-underwater-acoustic-ray-propagation-modeling-2205.06066"/></url>
<url><loc>https://scifaro.com/en/abs/the-acm-multimedia-2022-computational-paralinguistics-challenge-vocalisations-stuttering-activity-mosquitoes-2205.06799</loc><lastmod>2022-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-acm-multimedia-2022-computational-paralinguistics-challenge-vocalisations-stuttering-activity-mosquitoes-2205.06799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-acm-multimedia-2022-computational-paralinguistics-challenge-vocalisations-stuttering-activity-mosquitoes-2205.06799"/></url>
<url><loc>https://scifaro.com/en/abs/cmelgan-an-efficient-conditional-generative-model-based-on-mel-spectrograms-2205.07319</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cmelgan-an-efficient-conditional-generative-model-based-on-mel-spectrograms-2205.07319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cmelgan-an-efficient-conditional-generative-model-based-on-mel-spectrograms-2205.07319"/></url>
<url><loc>https://scifaro.com/en/abs/prism-pre-trained-indeterminate-speaker-representation-model-for-speaker-diarization-and-speaker-verification-2205.07450</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prism-pre-trained-indeterminate-speaker-representation-model-for-speaker-diarization-and-speaker-verification-2205.07450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prism-pre-trained-indeterminate-speaker-representation-model-for-speaker-diarization-and-speaker-verification-2205.07450"/></url>
<url><loc>https://scifaro.com/en/abs/l3-net-deep-audio-embeddings-to-improve-covid-19-detection-from-smartphone-data-2205.07682</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l3-net-deep-audio-embeddings-to-improve-covid-19-detection-from-smartphone-data-2205.07682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l3-net-deep-audio-embeddings-to-improve-covid-19-detection-from-smartphone-data-2205.07682"/></url>
<url><loc>https://scifaro.com/en/abs/transferability-of-adversarial-attacks-on-synthetic-speech-detection-2205.07711</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferability-of-adversarial-attacks-on-synthetic-speech-detection-2205.07711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferability-of-adversarial-attacks-on-synthetic-speech-detection-2205.07711"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-weighted-multi-dilation-temporal-convolutional-networks-for-monaural-speech-dereverberation-2205.08455</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-weighted-multi-dilation-temporal-convolutional-networks-for-monaural-speech-dereverberation-2205.08455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-weighted-multi-dilation-temporal-convolutional-networks-for-monaural-speech-dereverberation-2205.08455"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-recognition-of-speakers-for-consent-management-by-contrastive-embedding-replay-2205.08459</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-recognition-of-speakers-for-consent-management-by-contrastive-embedding-replay-2205.08459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-recognition-of-speakers-for-consent-management-by-contrastive-embedding-replay-2205.08459"/></url>
<url><loc>https://scifaro.com/en/abs/the-power-of-fragmentation-a-hierarchical-transformer-model-for-structural-segmentation-in-symbolic-music-generation-2205.08579</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-power-of-fragmentation-a-hierarchical-transformer-model-for-structural-segmentation-in-symbolic-music-generation-2205.08579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-power-of-fragmentation-a-hierarchical-transformer-model-for-structural-segmentation-in-symbolic-music-generation-2205.08579"/></url>
<url><loc>https://scifaro.com/en/abs/deploying-self-supervised-learning-in-the-wild-for-hybrid-automatic-speech-recognition-2205.08598</loc><lastmod>2022-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deploying-self-supervised-learning-in-the-wild-for-hybrid-automatic-speech-recognition-2205.08598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deploying-self-supervised-learning-in-the-wild-for-hybrid-automatic-speech-recognition-2205.08598"/></url>
<url><loc>https://scifaro.com/en/abs/mesh2ir-neural-acoustic-impulse-response-generator-for-complex-3d-scenes-2205.09248</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mesh2ir-neural-acoustic-impulse-response-generator-for-complex-3d-scenes-2205.09248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mesh2ir-neural-acoustic-impulse-response-generator-for-complex-3d-scenes-2205.09248"/></url>
<url><loc>https://scifaro.com/en/abs/the-ai-mechanic-acoustic-vehicle-characterization-neural-networks-2205.09667</loc><lastmod>2022-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ai-mechanic-acoustic-vehicle-characterization-neural-networks-2205.09667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ai-mechanic-acoustic-vehicle-characterization-neural-networks-2205.09667"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-binary-time-frequency-masks-from-ambient-noise-2205.10205</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-binary-time-frequency-masks-from-ambient-noise-2205.10205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-binary-time-frequency-masks-from-ambient-noise-2205.10205"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-offsets-multilateration-a-new-paradigm-for-sensor-network-calibration-with-unsynchronized-reference-nodes-2205.11299</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-offsets-multilateration-a-new-paradigm-for-sensor-network-calibration-with-unsynchronized-reference-nodes-2205.11299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-offsets-multilateration-a-new-paradigm-for-sensor-network-calibration-with-unsynchronized-reference-nodes-2205.11299"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-few-shot-learning-algorithm-for-rare-sound-event-detection-2205.11738</loc><lastmod>2022-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-few-shot-learning-algorithm-for-rare-sound-event-detection-2205.11738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-few-shot-learning-algorithm-for-rare-sound-event-detection-2205.11738"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-automated-classification-of-chinese-speech-sound-disorders-2205.11748</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-automated-classification-of-chinese-speech-sound-disorders-2205.11748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-automated-classification-of-chinese-speech-sound-disorders-2205.11748"/></url>
<url><loc>https://scifaro.com/en/abs/singer-identification-for-metaverse-with-timbral-and-middle-level-perceptual-features-2205.11817</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singer-identification-for-metaverse-with-timbral-and-middle-level-perceptual-features-2205.11817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singer-identification-for-metaverse-with-timbral-and-middle-level-perceptual-features-2205.11817"/></url>
<url><loc>https://scifaro.com/en/abs/metasid-singer-identification-with-domain-adaptation-for-metaverse-2205.11821</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metasid-singer-identification-with-domain-adaptation-for-metaverse-2205.11821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metasid-singer-identification-with-domain-adaptation-for-metaverse-2205.11821"/></url>
<url><loc>https://scifaro.com/en/abs/tdass-target-domain-adaptation-speech-synthesis-framework-for-multi-speaker-low-resource-tts-2205.11824</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tdass-target-domain-adaptation-speech-synthesis-framework-for-multi-speaker-low-resource-tts-2205.11824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tdass-target-domain-adaptation-speech-synthesis-framework-for-multi-speaker-low-resource-tts-2205.11824"/></url>
<url><loc>https://scifaro.com/en/abs/susing-su-net-for-singing-voice-synthesis-2205.11841</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/susing-su-net-for-singing-voice-synthesis-2205.11841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/susing-su-net-for-singing-voice-synthesis-2205.11841"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-reservoir-computing-models-for-persian-speech-recognition-2205.12594</loc><lastmod>2022-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-reservoir-computing-models-for-persian-speech-recognition-2205.12594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-reservoir-computing-models-for-persian-speech-recognition-2205.12594"/></url>
<url><loc>https://scifaro.com/en/abs/dt-sv-a-transformer-based-time-domain-approach-for-speaker-verification-2205.13249</loc><lastmod>2022-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dt-sv-a-transformer-based-time-domain-approach-for-speaker-verification-2205.13249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dt-sv-a-transformer-based-time-domain-approach-for-speaker-verification-2205.13249"/></url>
<url><loc>https://scifaro.com/en/abs/mimii-dg-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-for-domain-generalization-task-2205.13879</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimii-dg-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-for-domain-generalization-task-2205.13879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimii-dg-sound-dataset-for-malfunctioning-industrial-machine-investigation-and-inspection-for-domain-generalization-task-2205.13879"/></url>
<url><loc>https://scifaro.com/en/abs/speech-augmentation-based-unsupervised-learning-for-keyword-spotting-2205.14329</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-augmentation-based-unsupervised-learning-for-keyword-spotting-2205.14329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-augmentation-based-unsupervised-learning-for-keyword-spotting-2205.14329"/></url>
<url><loc>https://scifaro.com/en/abs/feature-pyramid-attention-based-residual-neural-network-for-environmental-sound-classification-2205.14411</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-pyramid-attention-based-residual-neural-network-for-environmental-sound-classification-2205.14411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-pyramid-attention-based-residual-neural-network-for-environmental-sound-classification-2205.14411"/></url>
<url><loc>https://scifaro.com/en/abs/supervoice-text-independent-speaker-verification-using-ultrasound-energy-in-human-speech-2205.14496</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervoice-text-independent-speaker-verification-using-ultrasound-energy-in-human-speech-2205.14496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervoice-text-independent-speaker-verification-using-ultrasound-energy-in-human-speech-2205.14496"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-using-speech-recognition-2205.14649</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-using-speech-recognition-2205.14649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-using-speech-recognition-2205.14649"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-beats-and-downbeats-with-a-time-frequency-transformer-2205.14701</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-beats-and-downbeats-with-a-time-frequency-transformer-2205.14701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-beats-and-downbeats-with-a-time-frequency-transformer-2205.14701"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-acoustic-echo-cancellation-for-full-duplex-communications-2205.15195</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-acoustic-echo-cancellation-for-full-duplex-communications-2205.15195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-acoustic-echo-cancellation-for-full-duplex-communications-2205.15195"/></url>
<url><loc>https://scifaro.com/en/abs/ai-enabled-sound-pattern-recognition-on-asthma-medication-adherence-evaluation-with-the-rda-benchmark-suite-2205.15360</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-enabled-sound-pattern-recognition-on-asthma-medication-adherence-evaluation-with-the-rda-benchmark-suite-2205.15360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-enabled-sound-pattern-recognition-on-asthma-medication-adherence-evaluation-with-the-rda-benchmark-suite-2205.15360"/></url>
<url><loc>https://scifaro.com/en/abs/guided-tts-2-a-diffusion-model-for-high-quality-adaptive-text-to-speech-with-untranscribed-data-2205.15370</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-tts-2-a-diffusion-model-for-high-quality-adaptive-text-to-speech-with-untranscribed-data-2205.15370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-tts-2-a-diffusion-model-for-high-quality-adaptive-text-to-speech-with-untranscribed-data-2205.15370"/></url>
<url><loc>https://scifaro.com/en/abs/adavits-tiny-vits-for-low-computing-resource-speaker-adaptation-2206.00208</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adavits-tiny-vits-for-low-computing-resource-speaker-adaptation-2206.00208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adavits-tiny-vits-for-low-computing-resource-speaker-adaptation-2206.00208"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generalisable-audio-representations-for-audio-visual-navigation-2206.00393</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generalisable-audio-representations-for-audio-visual-navigation-2206.00393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generalisable-audio-representations-for-audio-visual-navigation-2206.00393"/></url>
<url><loc>https://scifaro.com/en/abs/towards-context-aware-neural-performance-score-synchronisation-2206.00454</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-context-aware-neural-performance-score-synchronisation-2206.00454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-context-aware-neural-performance-score-synchronisation-2206.00454"/></url>
<url><loc>https://scifaro.com/en/abs/speech-artifact-removal-from-eeg-recordings-of-spoken-word-production-with-tensor-decomposition-2206.00635</loc><lastmod>2022-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-artifact-removal-from-eeg-recordings-of-spoken-word-production-with-tensor-decomposition-2206.00635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-artifact-removal-from-eeg-recordings-of-spoken-word-production-with-tensor-decomposition-2206.00635"/></url>
<url><loc>https://scifaro.com/en/abs/musical-instrument-recognition-by-xgboost-combining-feature-fusion-2206.00901</loc><lastmod>2022-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-instrument-recognition-by-xgboost-combining-feature-fusion-2206.00901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-instrument-recognition-by-xgboost-combining-feature-fusion-2206.00901"/></url>
<url><loc>https://scifaro.com/en/abs/partitura-a-python-package-for-symbolic-music-processing-2206.01071</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partitura-a-python-package-for-symbolic-music-processing-2206.01071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partitura-a-python-package-for-symbolic-music-processing-2206.01071"/></url>
<url><loc>https://scifaro.com/en/abs/the-match-file-format-encoding-alignments-between-scores-and-performances-2206.01104</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-match-file-format-encoding-alignments-between-scores-and-performances-2206.01104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-match-file-format-encoding-alignments-between-scores-and-performances-2206.01104"/></url>
<url><loc>https://scifaro.com/en/abs/the-musical-arrow-of-time-the-role-of-temporal-asymmetry-in-music-and-its-organicist-implications-2206.01305</loc><lastmod>2022-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-musical-arrow-of-time-the-role-of-temporal-asymmetry-in-music-and-its-organicist-implications-2206.01305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-musical-arrow-of-time-the-role-of-temporal-asymmetry-in-music-and-its-organicist-implications-2206.01305"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-the-severity-of-major-depressive-disorder-from-speech-a-novel-hard-training-methodology-2206.01542</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-the-severity-of-major-depressive-disorder-from-speech-a-novel-hard-training-methodology-2206.01542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-the-severity-of-major-depressive-disorder-from-speech-a-novel-hard-training-methodology-2206.01542"/></url>
<url><loc>https://scifaro.com/en/abs/variable-rate-hierarchical-cpc-leads-to-acoustic-unit-discovery-in-speech-2206.02211</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variable-rate-hierarchical-cpc-leads-to-acoustic-unit-discovery-in-speech-2206.02211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variable-rate-hierarchical-cpc-leads-to-acoustic-unit-discovery-in-speech-2206.02211"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-voice-conditioning-for-denoising-diffusion-tts-models-2206.02246</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-voice-conditioning-for-denoising-diffusion-tts-models-2206.02246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-voice-conditioning-for-denoising-diffusion-tts-models-2206.02246"/></url>
<url><loc>https://scifaro.com/en/abs/tagged-mri-sequence-to-audio-synthesis-via-self-residual-attention-guided-heterogeneous-translator-2206.02284</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tagged-mri-sequence-to-audio-synthesis-via-self-residual-attention-guided-heterogeneous-translator-2206.02284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tagged-mri-sequence-to-audio-synthesis-via-self-residual-attention-guided-heterogeneous-translator-2206.02284"/></url>
<url><loc>https://scifaro.com/en/abs/canonical-cortical-graph-neural-networks-and-its-application-for-speech-enhancement-in-audio-visual-hearing-aids-2206.02671</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/canonical-cortical-graph-neural-networks-and-its-application-for-speech-enhancement-in-audio-visual-hearing-aids-2206.02671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/canonical-cortical-graph-neural-networks-and-its-application-for-speech-enhancement-in-audio-visual-hearing-aids-2206.02671"/></url>
<url><loc>https://scifaro.com/en/abs/universal-speech-enhancement-with-score-based-diffusion-2206.03065</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-speech-enhancement-with-score-based-diffusion-2206.03065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-speech-enhancement-with-score-based-diffusion-2206.03065"/></url>
<url><loc>https://scifaro.com/en/abs/as2t-arbitrary-source-to-target-adversarial-attack-on-speaker-recognition-systems-2206.03351</loc><lastmod>2022-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/as2t-arbitrary-source-to-target-adversarial-attack-on-speaker-recognition-systems-2206.03351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/as2t-arbitrary-source-to-target-adversarial-attack-on-speaker-recognition-systems-2206.03351"/></url>
<url><loc>https://scifaro.com/en/abs/towards-understanding-and-mitigating-audio-adversarial-examples-for-speaker-recognition-2206.03393</loc><lastmod>2022-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-understanding-and-mitigating-audio-adversarial-examples-for-speaker-recognition-2206.03393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-understanding-and-mitigating-audio-adversarial-examples-for-speaker-recognition-2206.03393"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-audio-visual-learning-of-environment-acoustics-2206.04006</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-audio-visual-learning-of-environment-acoustics-2206.04006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-audio-visual-learning-of-environment-acoustics-2206.04006"/></url>
<url><loc>https://scifaro.com/en/abs/bigvgan-a-universal-neural-vocoder-with-large-scale-training-2206.04658</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigvgan-a-universal-neural-vocoder-with-large-scale-training-2206.04658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigvgan-a-universal-neural-vocoder-with-large-scale-training-2206.04658"/></url>
<url><loc>https://scifaro.com/en/abs/clap-learning-audio-concepts-from-natural-language-supervision-2206.04769</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clap-learning-audio-concepts-from-natural-language-supervision-2206.04769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clap-learning-audio-concepts-from-natural-language-supervision-2206.04769"/></url>
<url><loc>https://scifaro.com/en/abs/speak-like-a-dog-human-to-non-human-creature-voice-conversion-2206.04780</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-like-a-dog-human-to-non-human-creature-voice-conversion-2206.04780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-like-a-dog-human-to-non-human-creature-voice-conversion-2206.04780"/></url>
<url><loc>https://scifaro.com/en/abs/motif-mining-and-unsupervised-representation-learning-for-birdclef-2022-2206.04805</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/motif-mining-and-unsupervised-representation-learning-for-birdclef-2022-2206.04805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/motif-mining-and-unsupervised-representation-learning-for-birdclef-2022-2206.04805"/></url>
<url><loc>https://scifaro.com/en/abs/feature-learning-and-ensemble-pre-tasks-based-self-supervised-speech-denoising-and-dereverberation-2206.04962</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-learning-and-ensemble-pre-tasks-based-self-supervised-speech-denoising-and-dereverberation-2206.04962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-learning-and-ensemble-pre-tasks-based-self-supervised-speech-denoising-and-dereverberation-2206.04962"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-classification-using-image-embeddings-2206.04984</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-classification-using-image-embeddings-2206.04984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-classification-using-image-embeddings-2206.04984"/></url>
<url><loc>https://scifaro.com/en/abs/going-beyond-the-cookie-theft-picture-test-detecting-cognitive-impairments-using-acoustic-features-2206.05018</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/going-beyond-the-cookie-theft-picture-test-detecting-cognitive-impairments-using-acoustic-features-2206.05018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/going-beyond-the-cookie-theft-picture-test-detecting-cognitive-impairments-using-acoustic-features-2206.05018"/></url>
<url><loc>https://scifaro.com/en/abs/ahd-convnet-for-speech-emotion-classification-2206.05286</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ahd-convnet-for-speech-emotion-classification-2206.05286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ahd-convnet-for-speech-emotion-classification-2206.05286"/></url>
<url><loc>https://scifaro.com/en/abs/multi-instrument-music-synthesis-with-spectrogram-diffusion-2206.05408</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-instrument-music-synthesis-with-spectrogram-diffusion-2206.05408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-instrument-music-synthesis-with-spectrogram-diffusion-2206.05408"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-discussion-on-dcase-2022-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-applying-domain-generalization-techniques-2206.05876</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2022-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-applying-domain-generalization-techniques-2206.05876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2022-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-applying-domain-generalization-techniques-2206.05876"/></url>
<url><loc>https://scifaro.com/en/abs/improvement-of-serial-approach-to-anomalous-sound-detection-by-incorporating-two-binary-cross-entropies-for-outlier-exposure-2206.05929</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvement-of-serial-approach-to-anomalous-sound-detection-by-incorporating-two-binary-cross-entropies-for-outlier-exposure-2206.05929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvement-of-serial-approach-to-anomalous-sound-detection-by-incorporating-two-binary-cross-entropies-for-outlier-exposure-2206.05929"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-2206.06057</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-2206.06057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-2206.06057"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-musical-chord-inversions-using-the-cartesian-coordinate-system-2206.06117</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-musical-chord-inversions-using-the-cartesian-coordinate-system-2206.06117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-musical-chord-inversions-using-the-cartesian-coordinate-system-2206.06117"/></url>
<url><loc>https://scifaro.com/en/abs/robust-time-series-denoising-with-learnable-wavelet-packet-transform-2206.06126</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-time-series-denoising-with-learnable-wavelet-packet-transform-2206.06126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-time-series-denoising-with-learnable-wavelet-packet-transform-2206.06126"/></url>
<url><loc>https://scifaro.com/en/abs/speech-intelligibility-of-simulated-hearing-loss-sounds-and-its-prediction-using-the-gammachirp-envelope-similarity-index-gesi-2206.06573</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-intelligibility-of-simulated-hearing-loss-sounds-and-its-prediction-using-the-gammachirp-envelope-similarity-index-gesi-2206.06573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-intelligibility-of-simulated-hearing-loss-sounds-and-its-prediction-using-the-gammachirp-envelope-similarity-index-gesi-2206.06573"/></url>
<url><loc>https://scifaro.com/en/abs/whis-hearing-impairment-simulator-based-on-the-gammachirp-auditory-filterbank-2206.06604</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whis-hearing-impairment-simulator-based-on-the-gammachirp-auditory-filterbank-2206.06604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whis-hearing-impairment-simulator-based-on-the-gammachirp-auditory-filterbank-2206.06604"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speaker-enrolment-for-few-shot-personalisation-in-emotional-vocalisation-prediction-2206.06680</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speaker-enrolment-for-few-shot-personalisation-in-emotional-vocalisation-prediction-2206.06680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speaker-enrolment-for-few-shot-personalisation-in-emotional-vocalisation-prediction-2206.06680"/></url>
<url><loc>https://scifaro.com/en/abs/lpcse-neural-speech-enhancement-through-linear-predictive-coding-2206.06908</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lpcse-neural-speech-enhancement-through-linear-predictive-coding-2206.06908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lpcse-neural-speech-enhancement-through-linear-predictive-coding-2206.06908"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-centroid-features-for-word-recognition-of-non-native-english-speakers-2206.07176</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-centroid-features-for-word-recognition-of-non-native-english-speakers-2206.07176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-centroid-features-for-word-recognition-of-non-native-english-speakers-2206.07176"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-emotion-strength-assessment-for-seen-and-unseen-speech-based-on-data-driven-deep-learning-2206.07229</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-emotion-strength-assessment-for-seen-and-unseen-speech-based-on-data-driven-deep-learning-2206.07229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-emotion-strength-assessment-for-seen-and-unseen-speech-based-on-data-driven-deep-learning-2206.07229"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-non-autoregressive-model-for-any-to-many-voice-conversion-2206.07288</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-non-autoregressive-model-for-any-to-many-voice-conversion-2206.07288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-non-autoregressive-model-for-any-to-many-voice-conversion-2206.07288"/></url>
<url><loc>https://scifaro.com/en/abs/text-aware-end-to-end-mispronunciation-detection-and-diagnosis-2206.07289</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-aware-end-to-end-mispronunciation-detection-and-diagnosis-2206.07289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-aware-end-to-end-mispronunciation-detection-and-diagnosis-2206.07289"/></url>
<url><loc>https://scifaro.com/en/abs/frcrn-boosting-feature-representation-using-frequency-recurrence-for-monaural-speech-enhancement-2206.07293</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frcrn-boosting-feature-representation-using-frequency-recurrence-for-monaural-speech-enhancement-2206.07293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frcrn-boosting-feature-representation-using-frequency-recurrence-for-monaural-speech-enhancement-2206.07293"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-design-and-training-strategies-for-rnn-based-online-neural-speech-separation-systems-2206.07340</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-design-and-training-strategies-for-rnn-based-online-neural-speech-separation-systems-2206.07340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-design-and-training-strategies-for-rnn-based-online-neural-speech-separation-systems-2206.07340"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-deep-mask-estimation-module-for-neural-source-separation-systems-2206.07347</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-deep-mask-estimation-module-for-neural-source-separation-systems-2206.07347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-deep-mask-estimation-module-for-neural-source-separation-systems-2206.07347"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-multi-feature-selection-and-ensembling-for-audio-classification-2206.07511</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-multi-feature-selection-and-ensembling-for-audio-classification-2206.07511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-multi-feature-selection-and-ensembling-for-audio-classification-2206.07511"/></url>
<url><loc>https://scifaro.com/en/abs/epg2s-speech-generation-and-speech-enhancement-based-on-electropalatography-and-audio-signals-using-multimodal-learning-2206.07860</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/epg2s-speech-generation-and-speech-enhancement-based-on-electropalatography-and-audio-signals-using-multimodal-learning-2206.07860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/epg2s-speech-generation-and-speech-enhancement-based-on-electropalatography-and-audio-signals-using-multimodal-learning-2206.07860"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-prosody-annotation-with-pre-trained-text-speech-model-2206.07956</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-prosody-annotation-with-pre-trained-text-speech-model-2206.07956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-prosody-annotation-with-pre-trained-text-speech-model-2206.07956"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2022-comparative-analysis-of-cnns-for-acoustic-scene-classification-under-low-complexity-considerations-2206.08007</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2022-comparative-analysis-of-cnns-for-acoustic-scene-classification-under-low-complexity-considerations-2206.08007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2022-comparative-analysis-of-cnns-for-acoustic-scene-classification-under-low-complexity-considerations-2206.08007"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-modeling-for-end-to-end-empathetic-dialogue-speech-synthesis-using-linguistic-and-prosodic-contexts-of-dialogue-history-2206.08039</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-modeling-for-end-to-end-empathetic-dialogue-speech-synthesis-using-linguistic-and-prosodic-contexts-of-dialogue-history-2206.08039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-modeling-for-end-to-end-empathetic-dialogue-speech-synthesis-using-linguistic-and-prosodic-contexts-of-dialogue-history-2206.08039"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-privacy-protection-on-speech-enhancement-2206.08170</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-privacy-protection-on-speech-enhancement-2206.08170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-privacy-protection-on-speech-enhancement-2206.08170"/></url>
<url><loc>https://scifaro.com/en/abs/censer-curriculum-semi-supervised-learning-for-speech-recognition-based-on-self-supervised-pre-training-2206.08189</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/censer-curriculum-semi-supervised-learning-for-speech-recognition-based-on-self-supervised-pre-training-2206.08189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/censer-curriculum-semi-supervised-learning-for-speech-recognition-based-on-self-supervised-pre-training-2206.08189"/></url>
<url><loc>https://scifaro.com/en/abs/event-related-data-conditioning-for-acoustic-event-classification-2206.08233</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/event-related-data-conditioning-for-acoustic-event-classification-2206.08233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/event-related-data-conditioning-for-acoustic-event-classification-2206.08233"/></url>
<url><loc>https://scifaro.com/en/abs/a-language-model-with-million-context-length-for-raw-audio-2206.08297</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-language-model-with-million-context-length-for-raw-audio-2206.08297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-language-model-with-million-context-length-for-raw-audio-2206.08297"/></url>
<url><loc>https://scifaro.com/en/abs/soundspaces-2-0-a-simulation-platform-for-visual-acoustic-learning-2206.08312</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundspaces-2-0-a-simulation-platform-for-visual-acoustic-learning-2206.08312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundspaces-2-0-a-simulation-platform-for-visual-acoustic-learning-2206.08312"/></url>
<url><loc>https://scifaro.com/en/abs/paraformer-fast-and-accurate-parallel-transformer-for-non-autoregressive-end-to-end-speech-recognition-2206.08317</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paraformer-fast-and-accurate-parallel-transformer-for-non-autoregressive-end-to-end-speech-recognition-2206.08317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paraformer-fast-and-accurate-parallel-transformer-for-non-autoregressive-end-to-end-speech-recognition-2206.08317"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-spoofing-aware-speaker-verification-with-multi-model-fusion-2206.09131</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-spoofing-aware-speaker-verification-with-multi-model-fusion-2206.09131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-spoofing-aware-speaker-verification-with-multi-model-fusion-2206.09131"/></url>
<url><loc>https://scifaro.com/en/abs/redundancy-reduction-twins-network-a-training-framework-for-multi-output-emotion-regression-2206.09142</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/redundancy-reduction-twins-network-a-training-framework-for-multi-output-emotion-regression-2206.09142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/redundancy-reduction-twins-network-a-training-framework-for-multi-output-emotion-regression-2206.09142"/></url>
<url><loc>https://scifaro.com/en/abs/gmm-based-multi-stage-wiener-filtering-for-low-snr-speech-enhancement-2206.09298</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gmm-based-multi-stage-wiener-filtering-for-low-snr-speech-enhancement-2206.09298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gmm-based-multi-stage-wiener-filtering-for-low-snr-speech-enhancement-2206.09298"/></url>
<url><loc>https://scifaro.com/en/abs/wolonet-wave-outlooker-for-efficient-and-high-fidelity-speech-synthesis-2206.09920</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wolonet-wave-outlooker-for-efficient-and-high-fidelity-speech-synthesis-2206.09920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wolonet-wave-outlooker-for-efficient-and-high-fidelity-speech-synthesis-2206.09920"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-grained-based-attention-network-for-semi-supervised-sound-event-detection-2206.10175</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-grained-based-attention-network-for-semi-supervised-sound-event-detection-2206.10175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-grained-based-attention-network-for-semi-supervised-sound-event-detection-2206.10175"/></url>
<url><loc>https://scifaro.com/en/abs/human-in-the-loop-speaker-adaptation-for-dnn-based-multi-speaker-tts-2206.10256</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-in-the-loop-speaker-adaptation-for-dnn-based-multi-speaker-tts-2206.10256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-in-the-loop-speaker-adaptation-for-dnn-based-multi-speaker-tts-2206.10256"/></url>
<url><loc>https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-based-on-multitask-learning-with-dynamic-weight-adaptation-2206.10349</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-based-on-multitask-learning-with-dynamic-weight-adaptation-2206.10349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-based-on-multitask-learning-with-dynamic-weight-adaptation-2206.10349"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-audio-visual-synchronization-for-active-speaker-detection-2206.10421</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-audio-visual-synchronization-for-active-speaker-detection-2206.10421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-audio-visual-synchronization-for-active-speaker-detection-2206.10421"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-effectiveness-of-self-supervised-learning-and-classifier-chains-in-emotion-recognition-of-nonverbal-vocalizations-2206.10695</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-effectiveness-of-self-supervised-learning-and-classifier-chains-in-emotion-recognition-of-nonverbal-vocalizations-2206.10695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-effectiveness-of-self-supervised-learning-and-classifier-chains-in-emotion-recognition-of-nonverbal-vocalizations-2206.10695"/></url>
<url><loc>https://scifaro.com/en/abs/jointist-joint-learning-for-multi-instrument-transcription-and-its-applications-2206.10805</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointist-joint-learning-for-multi-instrument-transcription-and-its-applications-2206.10805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointist-joint-learning-for-multi-instrument-transcription-and-its-applications-2206.10805"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-restrained-uncertainty-weighting-loss-for-multitask-learning-of-vocal-expression-2206.11049</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-restrained-uncertainty-weighting-loss-for-multitask-learning-of-vocal-expression-2206.11049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-restrained-uncertainty-weighting-loss-for-multitask-learning-of-vocal-expression-2206.11049"/></url>
<url><loc>https://scifaro.com/en/abs/radio2speech-high-quality-speech-recovery-from-radio-frequency-signals-2206.11066</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radio2speech-high-quality-speech-recovery-from-radio-frequency-signals-2206.11066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radio2speech-high-quality-speech-recovery-from-radio-frequency-signals-2206.11066"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-long-tailed-bird-audio-recognition-2206.11260</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-long-tailed-bird-audio-recognition-2206.11260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-long-tailed-bird-audio-recognition-2206.11260"/></url>
<url><loc>https://scifaro.com/en/abs/restoring-speech-intelligibility-for-hearing-aid-users-with-deep-learning-2206.11567</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/restoring-speech-intelligibility-for-hearing-aid-users-with-deep-learning-2206.11567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/restoring-speech-intelligibility-for-hearing-aid-users-with-deep-learning-2206.11567"/></url>
<url><loc>https://scifaro.com/en/abs/formant-estimation-and-tracking-using-probabilistic-heat-maps-2206.11632</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/formant-estimation-and-tracking-using-probabilistic-heat-maps-2206.11632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/formant-estimation-and-tracking-using-probabilistic-heat-maps-2206.11632"/></url>
<url><loc>https://scifaro.com/en/abs/towards-green-asr-lossless-4-bit-quantization-of-a-hybrid-tdnn-system-on-the-300-hr-switchboard-corpus-2206.11643</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-green-asr-lossless-4-bit-quantization-of-a-hybrid-tdnn-system-on-the-300-hr-switchboard-corpus-2206.11643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-green-asr-lossless-4-bit-quantization-of-a-hybrid-tdnn-system-on-the-300-hr-switchboard-corpus-2206.11643"/></url>
<url><loc>https://scifaro.com/en/abs/the-sjtu-x-lance-lab-system-for-cnsrc-2022-2206.11699</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sjtu-x-lance-lab-system-for-cnsrc-2022-2206.11699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sjtu-x-lance-lab-system-for-cnsrc-2022-2206.11699"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-supervised-and-self-supervised-embedding-for-exvo-multi-task-learning-track-2206.11968</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-supervised-and-self-supervised-embedding-for-exvo-multi-task-learning-track-2206.11968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-supervised-and-self-supervised-embedding-for-exvo-multi-task-learning-track-2206.11968"/></url>
<url><loc>https://scifaro.com/en/abs/byol-s-learning-self-supervised-speech-representations-by-bootstrapping-2206.12038</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/byol-s-learning-self-supervised-speech-representations-by-bootstrapping-2206.12038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/byol-s-learning-self-supervised-speech-representations-by-bootstrapping-2206.12038"/></url>
<url><loc>https://scifaro.com/en/abs/exact-prosody-cloning-in-zero-shot-multispeaker-text-to-speech-2206.12229</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exact-prosody-cloning-in-zero-shot-multispeaker-text-to-speech-2206.12229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exact-prosody-cloning-in-zero-shot-multispeaker-text-to-speech-2206.12229"/></url>
<url><loc>https://scifaro.com/en/abs/deformable-cnn-and-imbalance-aware-feature-learning-for-singing-technique-classification-2206.12230</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deformable-cnn-and-imbalance-aware-feature-learning-for-singing-technique-classification-2206.12230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deformable-cnn-and-imbalance-aware-feature-learning-for-singing-technique-classification-2206.12230"/></url>
<url><loc>https://scifaro.com/en/abs/pocap-corpus-a-multimodal-dataset-for-smart-operating-room-speech-assistant-using-interventional-radiology-workflow-analysis-2206.12320</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pocap-corpus-a-multimodal-dataset-for-smart-operating-room-speech-assistant-using-interventional-radiology-workflow-analysis-2206.12320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pocap-corpus-a-multimodal-dataset-for-smart-operating-room-speech-assistant-using-interventional-radiology-workflow-analysis-2206.12320"/></url>
<url><loc>https://scifaro.com/en/abs/burst2vec-an-adversarial-multi-task-approach-for-predicting-emotion-age-and-origin-from-vocal-bursts-2206.12469</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/burst2vec-an-adversarial-multi-task-approach-for-predicting-emotion-age-and-origin-from-vocal-bursts-2206.12469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/burst2vec-an-adversarial-multi-task-approach-for-predicting-emotion-age-and-origin-from-vocal-bursts-2206.12469"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-vocal-burst-modeling-with-resnets-and-pre-trained-paralinguistic-conformers-2206.12494</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-vocal-burst-modeling-with-resnets-and-pre-trained-paralinguistic-conformers-2206.12494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-vocal-burst-modeling-with-resnets-and-pre-trained-paralinguistic-conformers-2206.12494"/></url>
<url><loc>https://scifaro.com/en/abs/domain-generalization-with-relaxed-instance-frequency-wise-normalization-for-multi-device-acoustic-scene-classification-2206.12513</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-generalization-with-relaxed-instance-frequency-wise-normalization-for-multi-device-acoustic-scene-classification-2206.12513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-generalization-with-relaxed-instance-frequency-wise-normalization-for-multi-device-acoustic-scene-classification-2206.12513"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-context-aware-style-representation-for-expressive-speech-synthesis-2206.12559</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-context-aware-style-representation-for-expressive-speech-synthesis-2206.12559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-context-aware-style-representation-for-expressive-speech-synthesis-2206.12559"/></url>
<url><loc>https://scifaro.com/en/abs/generating-diverse-vocal-bursts-with-stylegan2-and-mel-spectrograms-2206.12563</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-diverse-vocal-bursts-with-stylegan2-and-mel-spectrograms-2206.12563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-diverse-vocal-bursts-with-stylegan2-and-mel-spectrograms-2206.12563"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervision-and-learnable-strfs-for-age-emotion-and-country-prediction-2206.12568</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervision-and-learnable-strfs-for-age-emotion-and-country-prediction-2206.12568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervision-and-learnable-strfs-for-age-emotion-and-country-prediction-2206.12568"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-personalized-non-speech-vocalization-from-discrete-speech-representations-2206.12662</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-personalized-non-speech-vocalization-from-discrete-speech-representations-2206.12662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-personalized-non-speech-vocalization-from-discrete-speech-representations-2206.12662"/></url>
<url><loc>https://scifaro.com/en/abs/on-comparison-of-encoders-for-attention-based-end-to-end-speech-recognition-in-standalone-and-rescoring-mode-2206.12829</loc><lastmod>2022-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-comparison-of-encoders-for-attention-based-end-to-end-speech-recognition-in-standalone-and-rescoring-mode-2206.12829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-comparison-of-encoders-for-attention-based-end-to-end-speech-recognition-in-standalone-and-rescoring-mode-2206.12829"/></url>
<url><loc>https://scifaro.com/en/abs/speak-like-a-professional-increasing-speech-intelligibility-by-mimicking-professional-announcer-voice-with-voice-conversion-2206.13021</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-like-a-professional-increasing-speech-intelligibility-by-mimicking-professional-announcer-voice-with-voice-conversion-2206.13021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-like-a-professional-increasing-speech-intelligibility-by-mimicking-professional-announcer-voice-with-voice-conversion-2206.13021"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-calibration-for-deep-audio-classifiers-2206.13071</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-calibration-for-deep-audio-classifiers-2206.13071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-calibration-for-deep-audio-classifiers-2206.13071"/></url>
<url><loc>https://scifaro.com/en/abs/sound-model-factory-an-integrated-system-architecture-for-generative-audio-modelling-2206.13085</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-model-factory-an-integrated-system-architecture-for-generative-audio-modelling-2206.13085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-model-factory-an-integrated-system-architecture-for-generative-audio-modelling-2206.13085"/></url>
<url><loc>https://scifaro.com/en/abs/speecheq-speech-emotion-recognition-based-on-multi-scale-unified-datasets-and-multitask-learning-2206.13101</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speecheq-speech-emotion-recognition-based-on-multi-scale-unified-datasets-and-multitask-learning-2206.13101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speecheq-speech-emotion-recognition-based-on-multi-scale-unified-datasets-and-multitask-learning-2206.13101"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-level-speaker-change-detection-with-difference-based-continuous-integrate-and-fire-2206.13110</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-level-speaker-change-detection-with-difference-based-continuous-integrate-and-fire-2206.13110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-level-speaker-change-detection-with-difference-based-continuous-integrate-and-fire-2206.13110"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-full-band-speech-enhancement-model-with-effective-spectral-compression-mapping-2206.13136</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-full-band-speech-enhancement-model-with-effective-spectral-compression-mapping-2206.13136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-full-band-speech-enhancement-model-with-effective-spectral-compression-mapping-2206.13136"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-acoustic-event-tagging-on-scene-classification-in-a-multi-task-learning-framework-2206.13476</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-acoustic-event-tagging-on-scene-classification-in-a-multi-task-learning-framework-2206.13476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-acoustic-event-tagging-on-scene-classification-in-a-multi-task-learning-framework-2206.13476"/></url>
<url><loc>https://scifaro.com/en/abs/clearbuds-wireless-binaural-earbuds-for-learning-based-speech-enhancement-2206.13611</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clearbuds-wireless-binaural-earbuds-for-learning-based-speech-enhancement-2206.13611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clearbuds-wireless-binaural-earbuds-for-learning-based-speech-enhancement-2206.13611"/></url>
<url><loc>https://scifaro.com/en/abs/tiny-sepformer-a-tiny-time-domain-transformer-network-for-speech-separation-2206.13689</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiny-sepformer-a-tiny-time-domain-transformer-network-for-speech-separation-2206.13689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiny-sepformer-a-tiny-time-domain-transformer-network-for-speech-separation-2206.13689"/></url>
<url><loc>https://scifaro.com/en/abs/dummy-prototypical-networks-for-few-shot-open-set-keyword-spotting-2206.13691</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dummy-prototypical-networks-for-few-shot-open-set-keyword-spotting-2206.13691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dummy-prototypical-networks-for-few-shot-open-set-keyword-spotting-2206.13691"/></url>
<url><loc>https://scifaro.com/en/abs/domain-agnostic-few-shot-learning-for-speaker-verification-2206.13700</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-agnostic-few-shot-learning-for-speaker-verification-2206.13700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-agnostic-few-shot-learning-for-speaker-verification-2206.13700"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-keyword-spotting-through-multi-task-learning-2206.13708</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-keyword-spotting-through-multi-task-learning-2206.13708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-keyword-spotting-through-multi-task-learning-2206.13708"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-speech-representations-for-the-mos-prediction-system-2206.13817</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-speech-representations-for-the-mos-prediction-system-2206.13817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-speech-representations-for-the-mos-prediction-system-2206.13817"/></url>
<url><loc>https://scifaro.com/en/abs/qti-submission-to-dcase-2021-residual-normalization-for-device-imbalanced-acoustic-scene-classification-with-efficient-design-2206.13909</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qti-submission-to-dcase-2021-residual-normalization-for-device-imbalanced-acoustic-scene-classification-with-efficient-design-2206.13909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qti-submission-to-dcase-2021-residual-normalization-for-device-imbalanced-acoustic-scene-classification-with-efficient-design-2206.13909"/></url>
<url><loc>https://scifaro.com/en/abs/attack-agnostic-dataset-towards-generalization-and-stabilization-of-audio-deepfake-detection-2206.13979</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attack-agnostic-dataset-towards-generalization-and-stabilization-of-audio-deepfake-detection-2206.13979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attack-agnostic-dataset-towards-generalization-and-stabilization-of-audio-deepfake-detection-2206.13979"/></url>
<url><loc>https://scifaro.com/en/abs/language-based-audio-retrieval-with-converging-tied-layers-and-contrastive-loss-2206.14659</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-based-audio-retrieval-with-converging-tied-layers-and-contrastive-loss-2206.14659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-based-audio-retrieval-with-converging-tied-layers-and-contrastive-loss-2206.14659"/></url>
<url><loc>https://scifaro.com/en/abs/drumgan-vst-a-plugin-for-drum-sound-analysis-synthesis-with-autoencoding-generative-adversarial-networks-2206.14723</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drumgan-vst-a-plugin-for-drum-sound-analysis-synthesis-with-autoencoding-generative-adversarial-networks-2206.14723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drumgan-vst-a-plugin-for-drum-sound-analysis-synthesis-with-autoencoding-generative-adversarial-networks-2206.14723"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-melody-generation-from-lyrics-with-discrete-valued-adversarial-training-2206.15027</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-melody-generation-from-lyrics-with-discrete-valued-adversarial-training-2206.15027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-melody-generation-from-lyrics-with-discrete-valued-adversarial-training-2206.15027"/></url>
<url><loc>https://scifaro.com/en/abs/fearless-feature-refinement-loss-for-ensembling-self-supervised-learning-features-in-robust-end-to-end-speech-recognition-2206.15056</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fearless-feature-refinement-loss-for-ensembling-self-supervised-learning-features-in-robust-end-to-end-speech-recognition-2206.15056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fearless-feature-refinement-loss-for-ensembling-self-supervised-learning-features-in-robust-end-to-end-speech-recognition-2206.15056"/></url>
<url><loc>https://scifaro.com/en/abs/language-model-based-emotion-prediction-methods-for-emotional-speech-synthesis-systems-2206.15067</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-model-based-emotion-prediction-methods-for-emotional-speech-synthesis-systems-2206.15067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-model-based-emotion-prediction-methods-for-emotional-speech-synthesis-systems-2206.15067"/></url>
<url><loc>https://scifaro.com/en/abs/an-evaluation-of-three-stage-voice-conversion-framework-for-noisy-and-reverberant-conditions-2206.15155</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-evaluation-of-three-stage-voice-conversion-framework-for-noisy-and-reverberant-conditions-2206.15155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-evaluation-of-three-stage-voice-conversion-framework-for-noisy-and-reverberant-conditions-2206.15155"/></url>
<url><loc>https://scifaro.com/en/abs/libaca-pyaca-and-aca-code-audio-content-analysis-in-3-languages-2206.15219</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libaca-pyaca-and-aca-code-audio-content-analysis-in-3-languages-2206.15219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libaca-pyaca-and-aca-code-audio-content-analysis-in-3-languages-2206.15219"/></url>
<url><loc>https://scifaro.com/en/abs/r-melnet-reduced-mel-spectral-modeling-for-neural-tts-2206.15276</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/r-melnet-reduced-mel-spectral-modeling-for-neural-tts-2206.15276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/r-melnet-reduced-mel-spectral-modeling-for-neural-tts-2206.15276"/></url>
<url><loc>https://scifaro.com/en/abs/sonification-as-a-reliable-alternative-to-conventional-visual-surgical-navigation-2206.15291</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonification-as-a-reliable-alternative-to-conventional-visual-surgical-navigation-2206.15291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonification-as-a-reliable-alternative-to-conventional-visual-surgical-navigation-2206.15291"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-neural-spatial-filtering-for-multichannel-source-separation-in-the-waveform-domain-2206.15423</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-neural-spatial-filtering-for-multichannel-source-separation-in-the-waveform-domain-2206.15423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-neural-spatial-filtering-for-multichannel-source-separation-in-the-waveform-domain-2206.15423"/></url>
<url><loc>https://scifaro.com/en/abs/volume-independent-music-matching-by-frequency-spectrum-comparison-2206.15426</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/volume-independent-music-matching-by-frequency-spectrum-comparison-2206.15426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/volume-independent-music-matching-by-frequency-spectrum-comparison-2206.15426"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-enhancement-through-fine-grained-speech-characteristics-2207.00237</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-enhancement-through-fine-grained-speech-characteristics-2207.00237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-enhancement-through-fine-grained-speech-characteristics-2207.00237"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-evaluation-of-speaker-similarity-2207.00344</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-evaluation-of-speaker-similarity-2207.00344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-evaluation-of-speaker-similarity-2207.00344"/></url>
<url><loc>https://scifaro.com/en/abs/distance-based-sound-separation-2207.00562</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distance-based-sound-separation-2207.00562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distance-based-sound-separation-2207.00562"/></url>
<url><loc>https://scifaro.com/en/abs/learning-noise-independent-speech-representation-for-high-quality-voice-conversion-for-noisy-target-speakers-2207.00756</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-noise-independent-speech-representation-for-high-quality-voice-conversion-for-noisy-target-speakers-2207.00756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-noise-independent-speech-representation-for-high-quality-voice-conversion-for-noisy-target-speakers-2207.00756"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-symbolic-music-segmentation-using-ensemble-temporal-prediction-errors-2207.00760</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-symbolic-music-segmentation-using-ensemble-temporal-prediction-errors-2207.00760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-symbolic-music-segmentation-using-ensemble-temporal-prediction-errors-2207.00760"/></url>
<url><loc>https://scifaro.com/en/abs/tree-constrained-pointer-generator-with-graph-neural-network-encodings-for-contextual-speech-recognition-2207.00857</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tree-constrained-pointer-generator-with-graph-neural-network-encodings-for-contextual-speech-recognition-2207.00857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tree-constrained-pointer-generator-with-graph-neural-network-encodings-for-contextual-speech-recognition-2207.00857"/></url>
<url><loc>https://scifaro.com/en/abs/improving-transformer-based-conversational-asr-by-inter-sentential-attention-mechanism-2207.00883</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-transformer-based-conversational-asr-by-inter-sentential-attention-mechanism-2207.00883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-transformer-based-conversational-asr-by-inter-sentential-attention-mechanism-2207.00883"/></url>
<url><loc>https://scifaro.com/en/abs/towards-error-resilient-neural-speech-coding-2207.00993</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-error-resilient-neural-speech-coding-2207.00993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-error-resilient-neural-speech-coding-2207.00993"/></url>
<url><loc>https://scifaro.com/en/abs/generating-gender-ambiguous-voices-for-privacy-preserving-speech-recognition-2207.01052</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-gender-ambiguous-voices-for-privacy-preserving-speech-recognition-2207.01052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-gender-ambiguous-voices-for-privacy-preserving-speech-recognition-2207.01052"/></url>
<url><loc>https://scifaro.com/en/abs/araus-a-large-scale-dataset-and-baseline-models-of-affective-responses-to-augmented-urban-soundscapes-2207.01078</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/araus-a-large-scale-dataset-and-baseline-models-of-affective-responses-to-augmented-urban-soundscapes-2207.01078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/araus-a-large-scale-dataset-and-baseline-models-of-affective-responses-to-augmented-urban-soundscapes-2207.01078"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-multi-correlation-learning-for-audio-visual-speech-separation-2207.01197</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-multi-correlation-learning-for-audio-visual-speech-separation-2207.01197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-multi-correlation-learning-for-audio-visual-speech-separation-2207.01197"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-prosody-compensation-for-end-to-end-speech-synthesis-2207.01198</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-prosody-compensation-for-end-to-end-speech-synthesis-2207.01198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-prosody-compensation-for-end-to-end-speech-synthesis-2207.01198"/></url>
<url><loc>https://scifaro.com/en/abs/tmgan-plc-audio-packet-loss-concealment-using-temporal-memory-generative-adversarial-network-2207.01255</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tmgan-plc-audio-packet-loss-concealment-using-temporal-memory-generative-adversarial-network-2207.01255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tmgan-plc-audio-packet-loss-concealment-using-temporal-memory-generative-adversarial-network-2207.01255"/></url>
<url><loc>https://scifaro.com/en/abs/minimizing-sequential-confusion-error-in-speech-command-recognition-2207.01261</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimizing-sequential-confusion-error-in-speech-command-recognition-2207.01261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimizing-sequential-confusion-error-in-speech-command-recognition-2207.01261"/></url>
<url><loc>https://scifaro.com/en/abs/catt-kws-a-multi-stage-customized-keyword-spotting-framework-based-on-cascaded-transducer-transformer-2207.01267</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catt-kws-a-multi-stage-customized-keyword-spotting-framework-based-on-cascaded-transducer-transformer-2207.01267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catt-kws-a-multi-stage-customized-keyword-spotting-framework-based-on-cascaded-transducer-transformer-2207.01267"/></url>
<url><loc>https://scifaro.com/en/abs/stochastic-restoration-of-heavily-compressed-musical-audio-using-generative-adversarial-networks-2207.01667</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stochastic-restoration-of-heavily-compressed-musical-audio-using-generative-adversarial-networks-2207.01667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stochastic-restoration-of-heavily-compressed-musical-audio-using-generative-adversarial-networks-2207.01667"/></url>
<url><loc>https://scifaro.com/en/abs/an-adaptive-music-generation-architecture-for-games-based-on-the-deep-learning-transformer-mode-2207.01698</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adaptive-music-generation-architecture-for-games-based-on-the-deep-learning-transformer-mode-2207.01698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adaptive-music-generation-architecture-for-games-based-on-the-deep-learning-transformer-mode-2207.01698"/></url>
<url><loc>https://scifaro.com/en/abs/backend-ensemble-for-speaker-verification-and-spoofing-countermeasure-2207.01802</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/backend-ensemble-for-speaker-verification-and-spoofing-countermeasure-2207.01802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/backend-ensemble-for-speaker-verification-and-spoofing-countermeasure-2207.01802"/></url>
<url><loc>https://scifaro.com/en/abs/glow-wavegan-2-high-quality-zero-shot-text-to-speech-synthesis-and-any-to-any-voice-conversion-2207.01832</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glow-wavegan-2-high-quality-zero-shot-text-to-speech-synthesis-and-any-to-any-voice-conversion-2207.01832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glow-wavegan-2-high-quality-zero-shot-text-to-speech-synthesis-and-any-to-any-voice-conversion-2207.01832"/></url>
<url><loc>https://scifaro.com/en/abs/wesinger-2-fully-parallel-singing-voice-synthesis-via-multi-singer-conditional-adversarial-training-2207.01886</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wesinger-2-fully-parallel-singing-voice-synthesis-via-multi-singer-conditional-adversarial-training-2207.01886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wesinger-2-fully-parallel-singing-voice-synthesis-via-multi-singer-conditional-adversarial-training-2207.01886"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-low-bitrate-speech-coding-with-pretrained-transformers-2207.02262</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-low-bitrate-speech-coding-with-pretrained-transformers-2207.02262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-low-bitrate-speech-coding-with-pretrained-transformers-2207.02262"/></url>
<url><loc>https://scifaro.com/en/abs/cross-scale-vector-quantization-for-scalable-neural-speech-coding-2207.03067</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-scale-vector-quantization-for-scalable-neural-speech-coding-2207.03067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-scale-vector-quantization-for-scalable-neural-speech-coding-2207.03067"/></url>
<url><loc>https://scifaro.com/en/abs/visual-assisted-sound-source-depth-estimation-in-the-wild-2207.03074</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-assisted-sound-source-depth-estimation-in-the-wild-2207.03074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-assisted-sound-source-depth-estimation-in-the-wild-2207.03074"/></url>
<url><loc>https://scifaro.com/en/abs/learning-music-dance-representations-through-explicit-implicit-rhythm-synchronization-2207.03190</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-music-dance-representations-through-explicit-implicit-rhythm-synchronization-2207.03190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-music-dance-representations-through-explicit-implicit-rhythm-synchronization-2207.03190"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-binaural-speech-synthesis-2207.03697</loc><lastmod>2022-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-binaural-speech-synthesis-2207.03697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-binaural-speech-synthesis-2207.03697"/></url>
<url><loc>https://scifaro.com/en/abs/fastlts-non-autoregressive-end-to-end-unconstrained-lip-to-speech-synthesis-2207.03800</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastlts-non-autoregressive-end-to-end-unconstrained-lip-to-speech-synthesis-2207.03800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastlts-non-autoregressive-end-to-end-unconstrained-lip-to-speech-synthesis-2207.03800"/></url>
<url><loc>https://scifaro.com/en/abs/bast-binaural-audio-spectrogram-transformer-for-binaural-sound-localization-2207.03927</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bast-binaural-audio-spectrogram-transformer-for-binaural-sound-localization-2207.03927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bast-binaural-audio-spectrogram-transformer-for-binaural-sound-localization-2207.03927"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-tasking-model-of-speaker-keyword-classification-for-keeping-human-in-the-loop-of-drone-assisted-inspection-2207.04027</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-tasking-model-of-speaker-keyword-classification-for-keeping-human-in-the-loop-of-drone-assisted-inspection-2207.04027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-tasking-model-of-speaker-keyword-classification-for-keeping-human-in-the-loop-of-drone-assisted-inspection-2207.04027"/></url>
<url><loc>https://scifaro.com/en/abs/automated-audio-captioning-and-language-based-audio-retrieval-2207.04156</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-audio-captioning-and-language-based-audio-retrieval-2207.04156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-audio-captioning-and-language-based-audio-retrieval-2207.04156"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-separate-voices-by-spatial-regions-2207.04203</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-separate-voices-by-spatial-regions-2207.04203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-separate-voices-by-spatial-regions-2207.04203"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representation-based-voice-conversion-2207.04356</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representation-based-voice-conversion-2207.04356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representation-based-voice-conversion-2207.04356"/></url>
<url><loc>https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-with-weakly-labeled-data-2207.04357</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-with-weakly-labeled-data-2207.04357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-analysis-of-acoustic-scenes-and-sound-events-with-weakly-labeled-data-2207.04357"/></url>
<url><loc>https://scifaro.com/en/abs/towards-proper-contrastive-self-supervised-learning-strategies-for-music-audio-representation-2207.04471</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-proper-contrastive-self-supervised-learning-strategies-for-music-audio-representation-2207.04471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-proper-contrastive-self-supervised-learning-strategies-for-music-audio-representation-2207.04471"/></url>
<url><loc>https://scifaro.com/en/abs/delightfultts-2-end-to-end-speech-synthesis-with-adversarial-vector-quantized-auto-encoders-2207.04646</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delightfultts-2-end-to-end-speech-synthesis-with-adversarial-vector-quantized-auto-encoders-2207.04646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delightfultts-2-end-to-end-speech-synthesis-with-adversarial-vector-quantized-auto-encoders-2207.04646"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-consistency-loss-and-step-wise-optimization-for-semi-supervised-joint-training-of-tts-and-asr-using-unpaired-text-data-2207.04659</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-consistency-loss-and-step-wise-optimization-for-semi-supervised-joint-training-of-tts-and-asr-using-unpaired-text-data-2207.04659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-consistency-loss-and-step-wise-optimization-for-semi-supervised-joint-training-of-tts-and-asr-using-unpaired-text-data-2207.04659"/></url>
<url><loc>https://scifaro.com/en/abs/the-hccl-system-for-the-nist-sre21-2207.04676</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hccl-system-for-the-nist-sre21-2207.04676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hccl-system-for-the-nist-sre21-2207.04676"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymization-with-phonetic-intermediate-representations-2207.04834</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymization-with-phonetic-intermediate-representations-2207.04834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymization-with-phonetic-intermediate-representations-2207.04834"/></url>
<url><loc>https://scifaro.com/en/abs/indoor-optical-fiber-eavesdropping-approach-and-its-avoidance-2207.05267</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/indoor-optical-fiber-eavesdropping-approach-and-its-avoidance-2207.05267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/indoor-optical-fiber-eavesdropping-approach-and-its-avoidance-2207.05267"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-from-augmented-auxiliary-data-for-improving-speech-emotion-recognition-2207.05298</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-from-augmented-auxiliary-data-for-improving-speech-emotion-recognition-2207.05298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-from-augmented-auxiliary-data-for-improving-speech-emotion-recognition-2207.05298"/></url>
<url><loc>https://scifaro.com/en/abs/western-mediterranean-wetlands-bird-species-classification-evaluating-small-footprint-deep-learning-approaches-on-a-new-annotated-dataset-2207.05393</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/western-mediterranean-wetlands-bird-species-classification-evaluating-small-footprint-deep-learning-approaches-on-a-new-annotated-dataset-2207.05393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/western-mediterranean-wetlands-bird-species-classification-evaluating-small-footprint-deep-learning-approaches-on-a-new-annotated-dataset-2207.05393"/></url>
<url><loc>https://scifaro.com/en/abs/a-generative-deep-learning-approach-for-shape-recognition-of-arbitrary-objects-from-phaseless-acoustic-scattering-data-2207.05433</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generative-deep-learning-approach-for-shape-recognition-of-arbitrary-objects-from-phaseless-acoustic-scattering-data-2207.05433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generative-deep-learning-approach-for-shape-recognition-of-arbitrary-objects-from-phaseless-acoustic-scattering-data-2207.05433"/></url>
<url><loc>https://scifaro.com/en/abs/efficientleaf-a-faster-learnable-audio-frontend-of-questionable-use-2207.05508</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficientleaf-a-faster-learnable-audio-frontend-of-questionable-use-2207.05508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficientleaf-a-faster-learnable-audio-frontend-of-questionable-use-2207.05508"/></url>
<url><loc>https://scifaro.com/en/abs/relyme-improving-lyric-to-melody-generation-by-incorporating-lyric-melody-relationships-2207.05688</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relyme-improving-lyric-to-melody-generation-by-incorporating-lyric-melody-relationships-2207.05688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relyme-improving-lyric-to-melody-generation-by-incorporating-lyric-melody-relationships-2207.05688"/></url>
<url><loc>https://scifaro.com/en/abs/distilled-non-semantic-speech-embeddings-with-binary-neural-networks-for-low-resource-devices-2207.05784</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distilled-non-semantic-speech-embeddings-with-binary-neural-networks-for-low-resource-devices-2207.05784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distilled-non-semantic-speech-embeddings-with-binary-neural-networks-for-low-resource-devices-2207.05784"/></url>
<url><loc>https://scifaro.com/en/abs/nec-speaker-selective-cancellation-via-neural-enhanced-ultrasound-shadowing-2207.05848</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nec-speaker-selective-cancellation-via-neural-enhanced-ultrasound-shadowing-2207.05848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nec-speaker-selective-cancellation-via-neural-enhanced-ultrasound-shadowing-2207.05848"/></url>
<url><loc>https://scifaro.com/en/abs/visual-context-driven-audio-feature-enhancement-for-robust-end-to-end-audio-visual-speech-recognition-2207.06020</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-context-driven-audio-feature-enhancement-for-robust-end-to-end-audio-visual-speech-recognition-2207.06020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-context-driven-audio-feature-enhancement-for-robust-end-to-end-audio-visual-speech-recognition-2207.06020"/></url>
<url><loc>https://scifaro.com/en/abs/subband-based-generative-adversarial-network-for-non-parallel-many-to-many-voice-conversion-2207.06057</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subband-based-generative-adversarial-network-for-non-parallel-many-to-many-voice-conversion-2207.06057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subband-based-generative-adversarial-network-for-non-parallel-many-to-many-voice-conversion-2207.06057"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-and-lossless-non-autoregressive-end-to-end-text-to-speech-2207.06088</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-and-lossless-non-autoregressive-end-to-end-text-to-speech-2207.06088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-and-lossless-non-autoregressive-end-to-end-text-to-speech-2207.06088"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-detection-for-highly-dense-birdsong-scenes-2207.06349</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-for-highly-dense-birdsong-scenes-2207.06349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-for-highly-dense-birdsong-scenes-2207.06349"/></url>
<url><loc>https://scifaro.com/en/abs/masked-autoencoders-that-listen-2207.06405</loc><lastmod>2023-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-autoencoders-that-listen-2207.06405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-autoencoders-that-listen-2207.06405"/></url>
<url><loc>https://scifaro.com/en/abs/wakeword-detection-under-distribution-shifts-2207.06423</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wakeword-detection-under-distribution-shifts-2207.06423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wakeword-detection-under-distribution-shifts-2207.06423"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-cross-lingual-speech-emotion-recognition-2207.06767</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-cross-lingual-speech-emotion-recognition-2207.06767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-cross-lingual-speech-emotion-recognition-2207.06767"/></url>
<url><loc>https://scifaro.com/en/abs/rsd-gan-regularized-sobolev-defense-gan-against-speech-to-text-adversarial-attacks-2207.06858</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rsd-gan-regularized-sobolev-defense-gan-against-speech-to-text-adversarial-attacks-2207.06858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rsd-gan-regularized-sobolev-defense-gan-against-speech-to-text-adversarial-attacks-2207.06858"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-low-resource-quechua-asr-improvement-2207.06872</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-low-resource-quechua-asr-improvement-2207.06872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-low-resource-quechua-asr-improvement-2207.06872"/></url>
<url><loc>https://scifaro.com/en/abs/sub-8-bit-quantization-of-streaming-keyword-spotting-models-for-embedded-chipsets-2207.06920</loc><lastmod>2022-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-8-bit-quantization-of-streaming-keyword-spotting-models-for-embedded-chipsets-2207.06920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-8-bit-quantization-of-streaming-keyword-spotting-models-for-embedded-chipsets-2207.06920"/></url>
<url><loc>https://scifaro.com/en/abs/proceedings-of-the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2207.06958</loc><lastmod>2022-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proceedings-of-the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2207.06958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proceedings-of-the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2207.06958"/></url>
<url><loc>https://scifaro.com/en/abs/multitrack-music-transformer-2207.06983</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitrack-music-transformer-2207.06983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitrack-music-transformer-2207.06983"/></url>
<url><loc>https://scifaro.com/en/abs/audio-guided-album-cover-art-generation-with-genetic-algorithms-2207.07162</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-guided-album-cover-art-generation-with-genetic-algorithms-2207.07162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-guided-album-cover-art-generation-with-genetic-algorithms-2207.07162"/></url>
<url><loc>https://scifaro.com/en/abs/podcastmix-a-dataset-for-separating-music-and-speech-in-podcasts-2207.07403</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/podcastmix-a-dataset-for-separating-music-and-speech-in-podcasts-2207.07403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/podcastmix-a-dataset-for-separating-music-and-speech-in-podcasts-2207.07403"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-on-device-environmental-sound-classification-2207.07429</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-on-device-environmental-sound-classification-2207.07429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-on-device-environmental-sound-classification-2207.07429"/></url>
<url><loc>https://scifaro.com/en/abs/low-bit-shift-network-for-end-to-end-spoken-language-understanding-2207.07497</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-bit-shift-network-for-end-to-end-spoken-language-understanding-2207.07497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-bit-shift-network-for-end-to-end-spoken-language-understanding-2207.07497"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2022-challenge-2207.07911</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2022-challenge-2207.07911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2022-challenge-2207.07911"/></url>
<url><loc>https://scifaro.com/en/abs/visually-aware-acoustic-event-detection-using-heterogeneous-graphs-2207.07935</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-aware-acoustic-event-detection-using-heterogeneous-graphs-2207.07935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-aware-acoustic-event-detection-using-heterogeneous-graphs-2207.07935"/></url>
<url><loc>https://scifaro.com/en/abs/latent-domain-predictive-neural-speech-coding-2207.08363</loc><lastmod>2025-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-domain-predictive-neural-speech-coding-2207.08363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-domain-predictive-neural-speech-coding-2207.08363"/></url>
<url><loc>https://scifaro.com/en/abs/the-vocal-signature-of-social-anxiety-exploration-using-hypothesis-testing-and-machine-learning-approaches-2207.08534</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-vocal-signature-of-social-anxiety-exploration-using-hypothesis-testing-and-machine-learning-approaches-2207.08534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-vocal-signature-of-social-anxiety-exploration-using-hypothesis-testing-and-machine-learning-approaches-2207.08534"/></url>
<url><loc>https://scifaro.com/en/abs/style-transfer-of-audio-effects-with-differentiable-signal-processing-2207.08759</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/style-transfer-of-audio-effects-with-differentiable-signal-processing-2207.08759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/style-transfer-of-audio-effects-with-differentiable-signal-processing-2207.08759"/></url>
<url><loc>https://scifaro.com/en/abs/audio-input-generates-continuous-frames-to-synthesize-facial-video-using-generative-adiversarial-networks-2207.08813</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-input-generates-continuous-frames-to-synthesize-facial-video-using-generative-adiversarial-networks-2207.08813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-input-generates-continuous-frames-to-synthesize-facial-video-using-generative-adiversarial-networks-2207.08813"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-environmental-sound-representation-learning-2207.08825</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-environmental-sound-representation-learning-2207.08825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-environmental-sound-representation-learning-2207.08825"/></url>
<url><loc>https://scifaro.com/en/abs/realistic-sources-receivers-and-walls-improve-the-generalisability-of-virtually-supervised-blind-acoustic-parameter-estimators-2207.09133</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realistic-sources-receivers-and-walls-improve-the-generalisability-of-virtually-supervised-blind-acoustic-parameter-estimators-2207.09133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realistic-sources-receivers-and-walls-improve-the-generalisability-of-virtually-supervised-blind-acoustic-parameter-estimators-2207.09133"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-applied-to-classify-flow-induced-sound-parameters-from-simulated-human-voice-2207.09265</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-applied-to-classify-flow-induced-sound-parameters-from-simulated-human-voice-2207.09265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-applied-to-classify-flow-induced-sound-parameters-from-simulated-human-voice-2207.09265"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-detection-from-respiratory-sounds-with-hierarchical-spectrogram-transformers-2207.09529</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-detection-from-respiratory-sounds-with-hierarchical-spectrogram-transformers-2207.09529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-detection-from-respiratory-sounds-with-hierarchical-spectrogram-transformers-2207.09529"/></url>
<url><loc>https://scifaro.com/en/abs/diffsound-discrete-diffusion-model-for-text-to-sound-generation-2207.09983</loc><lastmod>2023-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsound-discrete-diffusion-model-for-text-to-sound-generation-2207.09983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsound-discrete-diffusion-model-for-text-to-sound-generation-2207.09983"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-recognition-2207.10006</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-recognition-2207.10006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-recognition-2207.10006"/></url>
<url><loc>https://scifaro.com/en/abs/audioscopev2-audio-visual-attention-architectures-for-calibrated-open-domain-on-screen-sound-separation-2207.10141</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioscopev2-audio-visual-attention-architectures-for-calibrated-open-domain-on-screen-sound-separation-2207.10141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioscopev2-audio-visual-attention-architectures-for-calibrated-open-domain-on-screen-sound-separation-2207.10141"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-aware-multi-task-learning-based-speech-separation-2207.10229</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-aware-multi-task-learning-based-speech-separation-2207.10229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-aware-multi-task-learning-based-speech-separation-2207.10229"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-waveform-prior-2207.10441</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-waveform-prior-2207.10441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-waveform-prior-2207.10441"/></url>
<url><loc>https://scifaro.com/en/abs/room-geometry-blind-inference-based-on-the-localization-of-real-sound-source-and-first-order-reflections-2207.10478</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-geometry-blind-inference-based-on-the-localization-of-real-sound-source-and-first-order-reflections-2207.10478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-geometry-blind-inference-based-on-the-localization-of-real-sound-source-and-first-order-reflections-2207.10478"/></url>
<url><loc>https://scifaro.com/en/abs/surrey-system-for-dcase-2022-task-5-few-shot-bioacoustic-event-detection-with-segment-level-metric-learning-2207.10547</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surrey-system-for-dcase-2022-task-5-few-shot-bioacoustic-event-detection-with-segment-level-metric-learning-2207.10547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surrey-system-for-dcase-2022-task-5-few-shot-bioacoustic-event-detection-with-segment-level-metric-learning-2207.10547"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-transfer-and-distillation-from-autoregressive-to-non-autoregressive-speech-recognition-2207.10600</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-transfer-and-distillation-from-autoregressive-to-non-autoregressive-speech-recognition-2207.10600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-transfer-and-distillation-from-autoregressive-to-non-autoregressive-speech-recognition-2207.10600"/></url>
<url><loc>https://scifaro.com/en/abs/a-proposal-for-foley-sound-synthesis-challenge-2207.10760</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-proposal-for-foley-sound-synthesis-challenge-2207.10760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-proposal-for-foley-sound-synthesis-challenge-2207.10760"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-and-self-supervised-learning-for-compare-2022-stuttering-sub-challenge-2207.10817</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-and-self-supervised-learning-for-compare-2022-stuttering-sub-challenge-2207.10817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-and-self-supervised-learning-for-compare-2022-stuttering-sub-challenge-2207.10817"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-convolutional-neural-network-with-bicubic-spline-interpolation-for-sound-field-estimation-2207.10937</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-convolutional-neural-network-with-bicubic-spline-interpolation-for-sound-field-estimation-2207.10937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-convolutional-neural-network-with-bicubic-spline-interpolation-for-sound-field-estimation-2207.10937"/></url>
<url><loc>https://scifaro.com/en/abs/head-related-transfer-function-interpolation-from-spatially-sparse-measurements-using-autoencoder-with-source-position-conditioning-2207.10967</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/head-related-transfer-function-interpolation-from-spatially-sparse-measurements-using-autoencoder-with-source-position-conditioning-2207.10967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/head-related-transfer-function-interpolation-from-spatially-sparse-measurements-using-autoencoder-with-source-position-conditioning-2207.10967"/></url>
<url><loc>https://scifaro.com/en/abs/inference-skipping-for-more-efficient-real-time-speech-enhancement-with-parallel-rnns-2207.11108</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inference-skipping-for-more-efficient-real-time-speech-enhancement-with-parallel-rnns-2207.11108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inference-skipping-for-more-efficient-real-time-speech-enhancement-with-parallel-rnns-2207.11108"/></url>
<url><loc>https://scifaro.com/en/abs/learning-unsupervised-hierarchies-of-audio-concepts-2207.11231</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-unsupervised-hierarchies-of-audio-concepts-2207.11231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-unsupervised-hierarchies-of-audio-concepts-2207.11231"/></url>
<url><loc>https://scifaro.com/en/abs/housex-a-fine-grained-house-music-dataset-and-its-potential-in-the-music-industry-2207.11690</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/housex-a-fine-grained-house-music-dataset-and-its-potential-in-the-music-industry-2207.11690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/housex-a-fine-grained-house-music-dataset-and-its-potential-in-the-music-industry-2207.11690"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-source-separation-of-unknown-numbers-of-single-channel-underwater-acoustic-signals-based-on-deep-neural-networks-with-separator-decoder-structure-2207.11749</loc><lastmod>2024-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-source-separation-of-unknown-numbers-of-single-channel-underwater-acoustic-signals-based-on-deep-neural-networks-with-separator-decoder-structure-2207.11749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-source-separation-of-unknown-numbers-of-single-channel-underwater-acoustic-signals-based-on-deep-neural-networks-with-separator-decoder-structure-2207.11749"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-contrastive-representation-learning-for-audio-to-image-generation-2207.12121</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-contrastive-representation-learning-for-audio-to-image-generation-2207.12121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-contrastive-representation-learning-for-audio-to-image-generation-2207.12121"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adapting-deep-reinforcement-learning-for-real-world-speech-emotion-recognition-2207.12248</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adapting-deep-reinforcement-learning-for-real-world-speech-emotion-recognition-2207.12248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adapting-deep-reinforcement-learning-for-real-world-speech-emotion-recognition-2207.12248"/></url>
<url><loc>https://scifaro.com/en/abs/cfad-a-chinese-dataset-for-fake-audio-detection-2207.12308</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cfad-a-chinese-dataset-for-fake-audio-detection-2207.12308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cfad-a-chinese-dataset-for-fake-audio-detection-2207.12308"/></url>
<url><loc>https://scifaro.com/en/abs/an-exhaustive-variable-selection-study-for-linear-models-of-soundscape-emotions-rankings-and-gibbs-analysis-2207.12743</loc><lastmod>2022-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exhaustive-variable-selection-study-for-linear-models-of-soundscape-emotions-rankings-and-gibbs-analysis-2207.12743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exhaustive-variable-selection-study-for-linear-models-of-soundscape-emotions-rankings-and-gibbs-analysis-2207.12743"/></url>
<url><loc>https://scifaro.com/en/abs/distinguishing-between-pre-and-post-treatment-in-the-speech-of-patients-with-chronic-obstructive-pulmonary-disease-2207.12784</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distinguishing-between-pre-and-post-treatment-in-the-speech-of-patients-with-chronic-obstructive-pulmonary-disease-2207.12784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distinguishing-between-pre-and-post-treatment-in-the-speech-of-patients-with-chronic-obstructive-pulmonary-disease-2207.12784"/></url>
<url><loc>https://scifaro.com/en/abs/perception-aware-attack-creating-adversarial-music-via-reverse-engineering-human-perception-2207.13192</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perception-aware-attack-creating-adversarial-music-via-reverse-engineering-human-perception-2207.13192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perception-aware-attack-creating-adversarial-music-via-reverse-engineering-human-perception-2207.13192"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-audiovisual-feature-fusion-for-active-speaker-detection-2207.13434</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-audiovisual-feature-fusion-for-active-speaker-detection-2207.13434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-audiovisual-feature-fusion-for-active-speaker-detection-2207.13434"/></url>
<url><loc>https://scifaro.com/en/abs/soundchoice-grapheme-to-phoneme-models-with-semantic-disambiguation-2207.13703</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundchoice-grapheme-to-phoneme-models-with-semantic-disambiguation-2207.13703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundchoice-grapheme-to-phoneme-models-with-semantic-disambiguation-2207.13703"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-acoustic-mosquito-detection-in-noisy-conditions-using-trainable-kernels-and-augmentations-2207.13843</loc><lastmod>2022-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-acoustic-mosquito-detection-in-noisy-conditions-using-trainable-kernels-and-augmentations-2207.13843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-acoustic-mosquito-detection-in-noisy-conditions-using-trainable-kernels-and-augmentations-2207.13843"/></url>
<url><loc>https://scifaro.com/en/abs/eeg2mel-reconstructing-sound-from-brain-responses-to-music-2207.13845</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg2mel-reconstructing-sound-from-brain-responses-to-music-2207.13845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg2mel-reconstructing-sound-from-brain-responses-to-music-2207.13845"/></url>
<url><loc>https://scifaro.com/en/abs/learning-phone-recognition-from-unpaired-audio-and-phone-sequences-based-on-generative-adversarial-network-2207.14568</loc><lastmod>2022-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-phone-recognition-from-unpaired-audio-and-phone-sequences-based-on-generative-adversarial-network-2207.14568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-phone-recognition-from-unpaired-audio-and-phone-sequences-based-on-generative-adversarial-network-2207.14568"/></url>
<url><loc>https://scifaro.com/en/abs/towards-unconstrained-audio-splicing-detection-and-localization-with-neural-networks-2207.14682</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-unconstrained-audio-splicing-detection-and-localization-with-neural-networks-2207.14682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-unconstrained-audio-splicing-detection-and-localization-with-neural-networks-2207.14682"/></url>
<url><loc>https://scifaro.com/en/abs/jazz-contrafact-detection-2208.00792</loc><lastmod>2022-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jazz-contrafact-detection-2208.00792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jazz-contrafact-detection-2208.00792"/></url>
<url><loc>https://scifaro.com/en/abs/samplematch-drum-sample-retrieval-by-musical-context-2208.01141</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samplematch-drum-sample-retrieval-by-musical-context-2208.01141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samplematch-drum-sample-retrieval-by-musical-context-2208.01141"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deepfake-detection-based-on-a-combination-of-f0-information-and-real-plus-imaginary-spectrogram-features-2208.01214</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deepfake-detection-based-on-a-combination-of-f0-information-and-real-plus-imaginary-spectrogram-features-2208.01214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deepfake-detection-based-on-a-combination-of-f0-information-and-real-plus-imaginary-spectrogram-features-2208.01214"/></url>
<url><loc>https://scifaro.com/en/abs/vq-t-rnn-transducers-using-vector-quantized-prediction-network-states-2208.01818</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vq-t-rnn-transducers-using-vector-quantized-prediction-network-states-2208.01818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vq-t-rnn-transducers-using-vector-quantized-prediction-network-states-2208.01818"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2208.01917</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2208.01917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2208.01917"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speaker-verification-using-dynamic-loss-gate-and-label-correction-2208.01928</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speaker-verification-using-dynamic-loss-gate-and-label-correction-2208.01928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speaker-verification-using-dynamic-loss-gate-and-label-correction-2208.01928"/></url>
<url><loc>https://scifaro.com/en/abs/the-sjtu-system-for-short-duration-speaker-verification-challenge-2021-2208.01933</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sjtu-system-for-short-duration-speaker-verification-challenge-2021-2208.01933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sjtu-system-for-short-duration-speaker-verification-challenge-2021-2208.01933"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-scene-classification-via-contrastive-event-object-alignment-and-semantic-based-fusion-2208.02086</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-scene-classification-via-contrastive-event-object-alignment-and-semantic-based-fusion-2208.02086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-scene-classification-via-contrastive-event-object-alignment-and-semantic-based-fusion-2208.02086"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-on-asr-systems-an-overview-2208.02250</loc><lastmod>2022-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-on-asr-systems-an-overview-2208.02250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-on-asr-systems-an-overview-2208.02250"/></url>
<url><loc>https://scifaro.com/en/abs/tokyo-kion-on-query-based-generative-sonification-of-atmospheric-data-2208.02494</loc><lastmod>2022-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tokyo-kion-on-query-based-generative-sonification-of-atmospheric-data-2208.02494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tokyo-kion-on-query-based-generative-sonification-of-atmospheric-data-2208.02494"/></url>
<url><loc>https://scifaro.com/en/abs/keyword-spotting-system-and-evaluation-of-pruning-and-quantization-methods-on-low-power-edge-microcontrollers-2208.02765</loc><lastmod>2022-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keyword-spotting-system-and-evaluation-of-pruning-and-quantization-methods-on-low-power-edge-microcontrollers-2208.02765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keyword-spotting-system-and-evaluation-of-pruning-and-quantization-methods-on-low-power-edge-microcontrollers-2208.02765"/></url>
<url><loc>https://scifaro.com/en/abs/deep-feature-learning-for-medical-acoustics-2208.03084</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-feature-learning-for-medical-acoustics-2208.03084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-feature-learning-for-medical-acoustics-2208.03084"/></url>
<url><loc>https://scifaro.com/en/abs/robust-acoustic-domain-identification-with-its-application-to-speaker-diarization-2208.03162</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-acoustic-domain-identification-with-its-application-to-speaker-diarization-2208.03162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-acoustic-domain-identification-with-its-application-to-speaker-diarization-2208.03162"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-you-can-hear-audio-identification-with-playable-prototypes-2208.03311</loc><lastmod>2022-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-you-can-hear-audio-identification-with-playable-prototypes-2208.03311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-you-can-hear-audio-identification-with-playable-prototypes-2208.03311"/></url>
<url><loc>https://scifaro.com/en/abs/variational-autoencoders-for-anomaly-detection-in-respiratory-sounds-2208.03326</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-autoencoders-for-anomaly-detection-in-respiratory-sounds-2208.03326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-autoencoders-for-anomaly-detection-in-respiratory-sounds-2208.03326"/></url>
<url><loc>https://scifaro.com/en/abs/chronological-self-training-for-real-time-speaker-diarization-2208.03393</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chronological-self-training-for-real-time-speaker-diarization-2208.03393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chronological-self-training-for-real-time-speaker-diarization-2208.03393"/></url>
<url><loc>https://scifaro.com/en/abs/tgavc-improving-autoencoder-voice-conversion-with-text-guided-and-adversarial-training-2208.04035</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tgavc-improving-autoencoder-voice-conversion-with-text-guided-and-adversarial-training-2208.04035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tgavc-improving-autoencoder-voice-conversion-with-text-guided-and-adversarial-training-2208.04035"/></url>
<url><loc>https://scifaro.com/en/abs/denoising-induction-motor-sounds-using-an-autoencoder-2208.04462</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoising-induction-motor-sounds-using-an-autoencoder-2208.04462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoising-induction-motor-sounds-using-an-autoencoder-2208.04462"/></url>
<url><loc>https://scifaro.com/en/abs/ddsp-based-singing-vocoders-a-new-subtractive-based-synthesizer-and-a-comprehensive-evaluation-2208.04756</loc><lastmod>2022-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddsp-based-singing-vocoders-a-new-subtractive-based-synthesizer-and-a-comprehensive-evaluation-2208.04756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddsp-based-singing-vocoders-a-new-subtractive-based-synthesizer-and-a-comprehensive-evaluation-2208.04756"/></url>
<url><loc>https://scifaro.com/en/abs/pure-data-and-inscore-animated-notation-for-new-music-2208.04877</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pure-data-and-inscore-animated-notation-for-new-music-2208.04877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pure-data-and-inscore-animated-notation-for-new-music-2208.04877"/></url>
<url><loc>https://scifaro.com/en/abs/mathematical-foundations-of-complex-tonality-2208.04974</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mathematical-foundations-of-complex-tonality-2208.04974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mathematical-foundations-of-complex-tonality-2208.04974"/></url>
<url><loc>https://scifaro.com/en/abs/generative-data-augmentation-guided-by-triplet-loss-for-speech-emotion-recognition-2208.04994</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-data-augmentation-guided-by-triplet-loss-for-speech-emotion-recognition-2208.04994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-data-augmentation-guided-by-triplet-loss-for-speech-emotion-recognition-2208.04994"/></url>
<url><loc>https://scifaro.com/en/abs/subjective-evaluation-of-deep-neural-network-based-speech-enhancement-systems-in-real-world-conditions-2208.05057</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subjective-evaluation-of-deep-neural-network-based-speech-enhancement-systems-in-real-world-conditions-2208.05057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subjective-evaluation-of-deep-neural-network-based-speech-enhancement-systems-in-real-world-conditions-2208.05057"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-perceived-emotion-in-symbolic-music-generation-with-monte-carlo-tree-search-2208.05162</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-perceived-emotion-in-symbolic-music-generation-with-monte-carlo-tree-search-2208.05162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-perceived-emotion-in-symbolic-music-generation-with-monte-carlo-tree-search-2208.05162"/></url>
<url><loc>https://scifaro.com/en/abs/towards-cross-speaker-reading-style-transfer-on-audiobook-dataset-2208.05359</loc><lastmod>2022-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-cross-speaker-reading-style-transfer-on-audiobook-dataset-2208.05359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-cross-speaker-reading-style-transfer-on-audiobook-dataset-2208.05359"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-loop-generation-with-neural-discrete-representations-2208.05605</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-loop-generation-with-neural-discrete-representations-2208.05605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-loop-generation-with-neural-discrete-representations-2208.05605"/></url>
<url><loc>https://scifaro.com/en/abs/re-creation-of-creations-a-new-paradigm-for-lyric-to-melody-generation-2208.05697</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/re-creation-of-creations-a-new-paradigm-for-lyric-to-melody-generation-2208.05697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/re-creation-of-creations-a-new-paradigm-for-lyric-to-melody-generation-2208.05697"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-on-selecting-audio-pre-trained-models-for-audio-captioning-2208.06127</loc><lastmod>2022-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-on-selecting-audio-pre-trained-models-for-audio-captioning-2208.06127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-on-selecting-audio-pre-trained-models-for-audio-captioning-2208.06127"/></url>
<url><loc>https://scifaro.com/en/abs/ddx7-differentiable-fm-synthesis-of-musical-instrument-sounds-2208.06169</loc><lastmod>2022-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddx7-differentiable-fm-synthesis-of-musical-instrument-sounds-2208.06169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddx7-differentiable-fm-synthesis-of-musical-instrument-sounds-2208.06169"/></url>
<url><loc>https://scifaro.com/en/abs/models-of-music-cognition-and-composition-2208.06878</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/models-of-music-cognition-and-composition-2208.06878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/models-of-music-cognition-and-composition-2208.06878"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-impact-of-emotions-on-target-speech-extraction-and-speech-separation-2208.07091</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-impact-of-emotions-on-target-speech-extraction-and-speech-separation-2208.07091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-impact-of-emotions-on-target-speech-extraction-and-speech-separation-2208.07091"/></url>
<url><loc>https://scifaro.com/en/abs/towards-parametric-speech-synthesis-using-gaussian-markov-model-of-spectral-envelope-and-wavelet-based-decomposition-of-f0-2208.07122</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-parametric-speech-synthesis-using-gaussian-markov-model-of-spectral-envelope-and-wavelet-based-decomposition-of-f0-2208.07122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-parametric-speech-synthesis-using-gaussian-markov-model-of-spectral-envelope-and-wavelet-based-decomposition-of-f0-2208.07122"/></url>
<url><loc>https://scifaro.com/en/abs/lcsm-a-lightweight-complex-spectral-mapping-framework-for-stereophonic-acoustic-echo-cancellation-2208.07277</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lcsm-a-lightweight-complex-spectral-mapping-framework-for-stereophonic-acoustic-echo-cancellation-2208.07277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lcsm-a-lightweight-complex-spectral-mapping-framework-for-stereophonic-acoustic-echo-cancellation-2208.07277"/></url>
<url><loc>https://scifaro.com/en/abs/how-should-we-evaluate-synthesized-environmental-sounds-2208.07679</loc><lastmod>2022-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-should-we-evaluate-synthesized-environmental-sounds-2208.07679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-should-we-evaluate-synthesized-environmental-sounds-2208.07679"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audio-perception-of-music-by-ai-picked-room-acoustics-2208.07994</loc><lastmod>2022-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audio-perception-of-music-by-ai-picked-room-acoustics-2208.07994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audio-perception-of-music-by-ai-picked-room-acoustics-2208.07994"/></url>
<url><loc>https://scifaro.com/en/abs/domestic-sound-event-detection-by-shift-consistency-mean-teacher-training-and-adversarial-domain-adaptation-2208.08131</loc><lastmod>2022-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domestic-sound-event-detection-by-shift-consistency-mean-teacher-training-and-adversarial-domain-adaptation-2208.08131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domestic-sound-event-detection-by-shift-consistency-mean-teacher-training-and-adversarial-domain-adaptation-2208.08131"/></url>
<url><loc>https://scifaro.com/en/abs/extract-fundamental-frequency-based-on-cnn-combined-with-pyin-2208.08354</loc><lastmod>2022-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extract-fundamental-frequency-based-on-cnn-combined-with-pyin-2208.08354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extract-fundamental-frequency-based-on-cnn-combined-with-pyin-2208.08354"/></url>
<url><loc>https://scifaro.com/en/abs/musika-fast-infinite-waveform-music-generation-2208.08706</loc><lastmod>2022-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musika-fast-infinite-waveform-music-generation-2208.08706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musika-fast-infinite-waveform-music-generation-2208.08706"/></url>
<url><loc>https://scifaro.com/en/abs/deploying-enhanced-speech-feature-decreased-audio-complaints-at-svt-play-vod-service-2208.08960</loc><lastmod>2022-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deploying-enhanced-speech-feature-decreased-audio-complaints-at-svt-play-vod-service-2208.08960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deploying-enhanced-speech-feature-decreased-audio-complaints-at-svt-play-vod-service-2208.08960"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-for-the-automatic-indexing-of-sound-effects-libraries-2208.09096</loc><lastmod>2022-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-for-the-automatic-indexing-of-sound-effects-libraries-2208.09096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-for-the-automatic-indexing-of-sound-effects-libraries-2208.09096"/></url>
<url><loc>https://scifaro.com/en/abs/3m-an-effective-multi-view-multi-granularity-and-multi-aspect-modeling-approach-to-english-pronunciation-assessment-2208.09110</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3m-an-effective-multi-view-multi-granularity-and-multi-aspect-modeling-approach-to-english-pronunciation-assessment-2208.09110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3m-an-effective-multi-view-multi-granularity-and-multi-aspect-modeling-approach-to-english-pronunciation-assessment-2208.09110"/></url>
<url><loc>https://scifaro.com/en/abs/improving-post-processing-of-audio-event-detectors-using-reinforcement-learning-2208.09201</loc><lastmod>2022-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-post-processing-of-audio-event-detectors-using-reinforcement-learning-2208.09201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-post-processing-of-audio-event-detectors-using-reinforcement-learning-2208.09201"/></url>
<url><loc>https://scifaro.com/en/abs/fully-automated-end-to-end-fake-audio-detection-2208.09618</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-automated-end-to-end-fake-audio-detection-2208.09618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-automated-end-to-end-fake-audio-detection-2208.09618"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-investigation-for-detecting-vocoder-fingerprints-of-fake-audio-2208.09646</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-investigation-for-detecting-vocoder-fingerprints-of-fake-audio-2208.09646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-investigation-for-detecting-vocoder-fingerprints-of-fake-audio-2208.09646"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-with-graph-neural-networks-for-speech-emotion-recognition-2208.09830</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-with-graph-neural-networks-for-speech-emotion-recognition-2208.09830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-with-graph-neural-networks-for-speech-emotion-recognition-2208.09830"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-attention-transfer-for-efficient-speech-enhancement-2208.10367</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-attention-transfer-for-efficient-speech-enhancement-2208.10367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-attention-transfer-for-efficient-speech-enhancement-2208.10367"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deepfake-attribution-an-initial-dataset-and-investigation-2208.10489</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deepfake-attribution-an-initial-dataset-and-investigation-2208.10489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deepfake-attribution-an-initial-dataset-and-investigation-2208.10489"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-emotion-recognition-through-focus-and-calibration-attention-mechanisms-2208.10491</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-through-focus-and-calibration-attention-mechanisms-2208.10491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-through-focus-and-calibration-attention-mechanisms-2208.10491"/></url>
<url><loc>https://scifaro.com/en/abs/are-disentangled-representations-all-you-need-to-build-speaker-anonymization-systems-2208.10497</loc><lastmod>2023-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-disentangled-representations-all-you-need-to-build-speaker-anonymization-systems-2208.10497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-disentangled-representations-all-you-need-to-build-speaker-anonymization-systems-2208.10497"/></url>
<url><loc>https://scifaro.com/en/abs/concurrent-validity-of-automatic-speech-and-pause-measures-during-passage-reading-in-als-2208.10597</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concurrent-validity-of-automatic-speech-and-pause-measures-during-passage-reading-in-als-2208.10597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concurrent-validity-of-automatic-speech-and-pause-measures-during-passage-reading-in-als-2208.10597"/></url>
<url><loc>https://scifaro.com/en/abs/fall-detection-from-audios-with-audio-transformers-2208.10659</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fall-detection-from-audios-with-audio-transformers-2208.10659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fall-detection-from-audios-with-audio-transformers-2208.10659"/></url>
<url><loc>https://scifaro.com/en/abs/deep-model-with-built-in-cross-attention-alignment-for-acoustic-echo-cancellation-2208.11308</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-model-with-built-in-cross-attention-alignment-for-acoustic-echo-cancellation-2208.11308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-model-with-built-in-cross-attention-alignment-for-acoustic-echo-cancellation-2208.11308"/></url>
<url><loc>https://scifaro.com/en/abs/improved-zero-shot-audio-tagging-classification-with-patchout-spectrogram-transformers-2208.11402</loc><lastmod>2022-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-zero-shot-audio-tagging-classification-with-patchout-spectrogram-transformers-2208.11402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-zero-shot-audio-tagging-classification-with-patchout-spectrogram-transformers-2208.11402"/></url>
<url><loc>https://scifaro.com/en/abs/improving-natural-language-based-audio-retrieval-with-transfer-learning-and-audio-text-augmentations-2208.11460</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-natural-language-based-audio-retrieval-with-transfer-learning-and-audio-text-augmentations-2208.11460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-natural-language-based-audio-retrieval-with-transfer-learning-and-audio-text-augmentations-2208.11460"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-song-lyrics-with-an-audio-informed-pre-trained-language-model-2208.11671</loc><lastmod>2022-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-song-lyrics-with-an-audio-informed-pre-trained-language-model-2208.11671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-song-lyrics-with-an-audio-informed-pre-trained-language-model-2208.11671"/></url>
<url><loc>https://scifaro.com/en/abs/digital-audio-tampering-detection-based-on-enf-spatio-temporal-features-representation-learning-2208.11920</loc><lastmod>2022-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-audio-tampering-detection-based-on-enf-spatio-temporal-features-representation-learning-2208.11920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-audio-tampering-detection-based-on-enf-spatio-temporal-features-representation-learning-2208.11920"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-broadcast-networks-for-music-genre-classification-2208.12086</loc><lastmod>2022-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-broadcast-networks-for-music-genre-classification-2208.12086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-broadcast-networks-for-music-genre-classification-2208.12086"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-audio-language-learning-for-music-2208.12208</loc><lastmod>2022-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-audio-language-learning-for-music-2208.12208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-audio-language-learning-for-music-2208.12208"/></url>
<url><loc>https://scifaro.com/en/abs/music-separation-enhancement-with-generative-modeling-2208.12387</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-separation-enhancement-with-generative-modeling-2208.12387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-separation-enhancement-with-generative-modeling-2208.12387"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-symmetrical-convolutional-transformer-networks-for-speech-to-singing-voice-style-transfer-2208.12410</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-symmetrical-convolutional-transformer-networks-for-speech-to-singing-voice-style-transfer-2208.12410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-symmetrical-convolutional-transformer-networks-for-speech-to-singing-voice-style-transfer-2208.12410"/></url>
<url><loc>https://scifaro.com/en/abs/concept-based-techniques-for-musicologist-friendly-explanations-in-a-deep-music-classifier-2208.12485</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concept-based-techniques-for-musicologist-friendly-explanations-in-a-deep-music-classifier-2208.12485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concept-based-techniques-for-musicologist-friendly-explanations-in-a-deep-music-classifier-2208.12485"/></url>
<url><loc>https://scifaro.com/en/abs/spatio-temporal-representation-learning-enhanced-source-cell-phone-recognition-from-speech-recordings-2208.12753</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatio-temporal-representation-learning-enhanced-source-cell-phone-recognition-from-speech-recordings-2208.12753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatio-temporal-representation-learning-enhanced-source-cell-phone-recognition-from-speech-recordings-2208.12753"/></url>
<url><loc>https://scifaro.com/en/abs/mel-spectrogram-inversion-with-stable-pitch-2208.12782</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-spectrogram-inversion-with-stable-pitch-2208.12782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-spectrogram-inversion-with-stable-pitch-2208.12782"/></url>
<url><loc>https://scifaro.com/en/abs/sa-sliding-attack-for-synthetic-speech-detection-with-resistance-to-clipping-and-self-splicing-2208.13066</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-sliding-attack-for-synthetic-speech-detection-with-resistance-to-clipping-and-self-splicing-2208.13066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-sliding-attack-for-synthetic-speech-detection-with-resistance-to-clipping-and-self-splicing-2208.13066"/></url>
<url><loc>https://scifaro.com/en/abs/training-text-to-speech-systems-from-synthetic-data-a-practical-approach-for-accent-transfer-tasks-2208.13183</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-text-to-speech-systems-from-synthetic-data-a-practical-approach-for-accent-transfer-tasks-2208.13183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-text-to-speech-systems-from-synthetic-data-a-practical-approach-for-accent-transfer-tasks-2208.13183"/></url>
<url><loc>https://scifaro.com/en/abs/towards-disentangled-speech-representations-2208.13191</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-disentangled-speech-representations-2208.13191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-disentangled-speech-representations-2208.13191"/></url>
<url><loc>https://scifaro.com/en/abs/computing-with-hypervectors-for-efficient-speaker-identification-2208.13285</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computing-with-hypervectors-for-efficient-speaker-identification-2208.13285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computing-with-hypervectors-for-efficient-speaker-identification-2208.13285"/></url>
<url><loc>https://scifaro.com/en/abs/gridless-3d-recovery-of-image-sources-from-room-impulse-responses-2208.14017</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gridless-3d-recovery-of-image-sources-from-room-impulse-responses-2208.14017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gridless-3d-recovery-of-image-sources-from-room-impulse-responses-2208.14017"/></url>
<url><loc>https://scifaro.com/en/abs/hppnet-modeling-the-harmonic-structure-and-pitch-invariance-in-piano-transcription-2208.14339</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hppnet-modeling-the-harmonic-structure-and-pitch-invariance-in-piano-transcription-2208.14339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hppnet-modeling-the-harmonic-structure-and-pitch-invariance-in-piano-transcription-2208.14339"/></url>
<url><loc>https://scifaro.com/en/abs/meloform-generating-melody-with-musical-form-based-on-expert-systems-and-neural-networks-2208.14345</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meloform-generating-melody-with-musical-form-based-on-expert-systems-and-neural-networks-2208.14345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meloform-generating-melody-with-musical-form-based-on-expert-systems-and-neural-networks-2208.14345"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-music-source-separation-on-loud-commercial-music-2208.14355</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-music-source-separation-on-loud-commercial-music-2208.14355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-music-source-separation-on-loud-commercial-music-2208.14355"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-tempo-and-meter-tracking-system-for-rhythmic-improvis-2208.14717</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-tempo-and-meter-tracking-system-for-rhythmic-improvis-2208.14717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-tempo-and-meter-tracking-system-for-rhythmic-improvis-2208.14717"/></url>
<url><loc>https://scifaro.com/en/abs/open-challenges-in-musical-metacreation-2208.14734</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-challenges-in-musical-metacreation-2208.14734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-challenges-in-musical-metacreation-2208.14734"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-corpus-for-computational-music-research-and-a-novel-method-for-musical-structure-analysis-2208.14747</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-corpus-for-computational-music-research-and-a-novel-method-for-musical-structure-analysis-2208.14747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-corpus-for-computational-music-research-and-a-novel-method-for-musical-structure-analysis-2208.14747"/></url>
<url><loc>https://scifaro.com/en/abs/harmonization-and-evaluation-tweaking-the-parameters-on-human-listeners-2208.14750</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonization-and-evaluation-tweaking-the-parameters-on-human-listeners-2208.14750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonization-and-evaluation-tweaking-the-parameters-on-human-listeners-2208.14750"/></url>
<url><loc>https://scifaro.com/en/abs/domain-shift-oriented-machine-anomalous-sound-detection-model-based-on-self-supervised-learning-2208.14812</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-shift-oriented-machine-anomalous-sound-detection-model-based-on-self-supervised-learning-2208.14812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-shift-oriented-machine-anomalous-sound-detection-model-based-on-self-supervised-learning-2208.14812"/></url>
<url><loc>https://scifaro.com/en/abs/cadence-detection-in-symbolic-classical-music-using-graph-neural-networks-2208.14819</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cadence-detection-in-symbolic-classical-music-using-graph-neural-networks-2208.14819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cadence-detection-in-symbolic-classical-music-using-graph-neural-networks-2208.14819"/></url>
<url><loc>https://scifaro.com/en/abs/sketching-the-expression-flexible-rendering-of-expressive-piano-performance-with-self-supervised-learning-2208.14867</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sketching-the-expression-flexible-rendering-of-expressive-piano-performance-with-self-supervised-learning-2208.14867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sketching-the-expression-flexible-rendering-of-expressive-piano-performance-with-self-supervised-learning-2208.14867"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-generative-audio-systems-and-their-metrics-2209.00130</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-generative-audio-systems-and-their-metrics-2209.00130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-generative-audio-systems-and-their-metrics-2209.00130"/></url>
<url><loc>https://scifaro.com/en/abs/what-is-missing-in-deep-music-generation-a-study-of-repetition-and-structure-in-popular-music-2209.00182</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-is-missing-in-deep-music-generation-a-study-of-repetition-and-structure-in-popular-music-2209.00182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-is-missing-in-deep-music-generation-a-study-of-repetition-and-structure-in-popular-music-2209.00182"/></url>
<url><loc>https://scifaro.com/en/abs/generating-coherent-drum-accompaniment-with-fills-and-improvisations-2209.00291</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-coherent-drum-accompaniment-with-fills-and-improvisations-2209.00291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-coherent-drum-accompaniment-with-fills-and-improvisations-2209.00291"/></url>
<url><loc>https://scifaro.com/en/abs/accomontage2-a-complete-harmonization-and-accompaniment-arrangement-system-2209.00353</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accomontage2-a-complete-harmonization-and-accompaniment-arrangement-system-2209.00353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accomontage2-a-complete-harmonization-and-accompaniment-arrangement-system-2209.00353"/></url>
<url><loc>https://scifaro.com/en/abs/identify-the-beehive-sound-using-deep-learning-2209.01374</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identify-the-beehive-sound-using-deep-learning-2209.01374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identify-the-beehive-sound-using-deep-learning-2209.01374"/></url>
<url><loc>https://scifaro.com/en/abs/equivariant-self-supervision-for-musical-tempo-estimation-2209.01478</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/equivariant-self-supervision-for-musical-tempo-estimation-2209.01478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/equivariant-self-supervision-for-musical-tempo-estimation-2209.01478"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-pre-trained-feature-networks-for-generative-adversarial-networks-in-audio-domain-loop-generation-2209.01751</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-pre-trained-feature-networks-for-generative-adversarial-networks-in-audio-domain-loop-generation-2209.01751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-pre-trained-feature-networks-for-generative-adversarial-networks-in-audio-domain-loop-generation-2209.01751"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-music-and-text-with-crowdsourced-music-comments-a-sequence-to-sequence-framework-for-thematic-music-comments-generation-2209.01996</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-music-and-text-with-crowdsourced-music-comments-a-sequence-to-sequence-framework-for-thematic-music-comments-generation-2209.01996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-music-and-text-with-crowdsourced-music-comments-a-sequence-to-sequence-framework-for-thematic-music-comments-generation-2209.01996"/></url>
<url><loc>https://scifaro.com/en/abs/instrument-separation-of-symbolic-music-by-explicitly-guided-diffusion-model-2209.02696</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instrument-separation-of-symbolic-music-by-explicitly-guided-diffusion-model-2209.02696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instrument-separation-of-symbolic-music-by-explicitly-guided-diffusion-model-2209.02696"/></url>
<url><loc>https://scifaro.com/en/abs/read-it-to-me-an-emotionally-aware-speech-narration-application-2209.02785</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/read-it-to-me-an-emotionally-aware-speech-narration-application-2209.02785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/read-it-to-me-an-emotionally-aware-speech-narration-application-2209.02785"/></url>
<url><loc>https://scifaro.com/en/abs/the-role-of-vocal-persona-in-natural-and-synthesized-speech-2209.02855</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-role-of-vocal-persona-in-natural-and-synthesized-speech-2209.02855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-role-of-vocal-persona-in-natural-and-synthesized-speech-2209.02855"/></url>
<url><loc>https://scifaro.com/en/abs/improving-choral-music-separation-through-expressive-synthesized-data-from-sampled-instruments-2209.02871</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-choral-music-separation-through-expressive-synthesized-data-from-sampled-instruments-2209.02871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-choral-music-separation-through-expressive-synthesized-data-from-sampled-instruments-2209.02871"/></url>
<url><loc>https://scifaro.com/en/abs/audiolm-a-language-modeling-approach-to-audio-generation-2209.03143</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiolm-a-language-modeling-approach-to-audio-generation-2209.03143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiolm-a-language-modeling-approach-to-audio-generation-2209.03143"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-speech-enhancement-using-burst-propagation-2209.03275</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-speech-enhancement-using-burst-propagation-2209.03275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-speech-enhancement-using-burst-propagation-2209.03275"/></url>
<url><loc>https://scifaro.com/en/abs/what-did-i-just-hear-detecting-pornographic-sounds-in-adult-videos-using-neural-networks-2209.03711</loc><lastmod>2022-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-did-i-just-hear-detecting-pornographic-sounds-in-adult-videos-using-neural-networks-2209.03711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-did-i-just-hear-detecting-pornographic-sounds-in-adult-videos-using-neural-networks-2209.03711"/></url>
<url><loc>https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-the-detection-of-covid-19-from-crowd-sourced-respiratory-voice-data-2209.03727</loc><lastmod>2022-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-the-detection-of-covid-19-from-crowd-sourced-respiratory-voice-data-2209.03727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-the-detection-of-covid-19-from-crowd-sourced-respiratory-voice-data-2209.03727"/></url>
<url><loc>https://scifaro.com/en/abs/hardware-accelerator-and-neural-network-co-optimization-for-ultra-low-power-audio-processing-devices-2209.03807</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hardware-accelerator-and-neural-network-co-optimization-for-ultra-low-power-audio-processing-devices-2209.03807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hardware-accelerator-and-neural-network-co-optimization-for-ultra-low-power-audio-processing-devices-2209.03807"/></url>
<url><loc>https://scifaro.com/en/abs/dyadic-interaction-assessment-from-free-living-audio-for-depression-severity-assessment-2209.03901</loc><lastmod>2022-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dyadic-interaction-assessment-from-free-living-audio-for-depression-severity-assessment-2209.03901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dyadic-interaction-assessment-from-free-living-audio-for-depression-severity-assessment-2209.03901"/></url>
<url><loc>https://scifaro.com/en/abs/tf-gridnet-making-time-frequency-domain-models-great-again-for-monaural-speaker-separation-2209.03952</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tf-gridnet-making-time-frequency-domain-models-great-again-for-monaural-speaker-separation-2209.03952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tf-gridnet-making-time-frequency-domain-models-great-again-for-monaural-speaker-separation-2209.03952"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-environmental-perception-of-autonomous-vehicles-using-deep-learning-based-audio-classification-2209.04075</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-environmental-perception-of-autonomous-vehicles-using-deep-learning-based-audio-classification-2209.04075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-environmental-perception-of-autonomous-vehicles-using-deep-learning-based-audio-classification-2209.04075"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-method-of-soundscape-impressions-using-environmental-sounds-and-aerial-photographs-2209.04077</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-method-of-soundscape-impressions-using-environmental-sounds-and-aerial-photographs-2209.04077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-method-of-soundscape-impressions-using-environmental-sounds-and-aerial-photographs-2209.04077"/></url>
<url><loc>https://scifaro.com/en/abs/matt-a-multiple-instance-attention-mechanism-for-long-tail-music-genre-classification-2209.04109</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matt-a-multiple-instance-attention-mechanism-for-long-tail-music-genre-classification-2209.04109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matt-a-multiple-instance-attention-mechanism-for-long-tail-music-genre-classification-2209.04109"/></url>
<url><loc>https://scifaro.com/en/abs/overlapped-speech-and-gender-detection-with-wavlm-pre-trained-features-2209.04167</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overlapped-speech-and-gender-detection-with-wavlm-pre-trained-features-2209.04167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overlapped-speech-and-gender-detection-with-wavlm-pre-trained-features-2209.04167"/></url>
<url><loc>https://scifaro.com/en/abs/a-semi-supervised-algorithm-for-improving-the-consistency-of-crowdsourced-datasets-the-covid-19-case-study-on-respiratory-disorder-classification-2209.04360</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-semi-supervised-algorithm-for-improving-the-consistency-of-crowdsourced-datasets-the-covid-19-case-study-on-respiratory-disorder-classification-2209.04360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-semi-supervised-algorithm-for-improving-the-consistency-of-crowdsourced-datasets-the-covid-19-case-study-on-respiratory-disorder-classification-2209.04360"/></url>
<url><loc>https://scifaro.com/en/abs/deid-vc-speaker-de-identification-via-zero-shot-pseudo-voice-conversion-2209.04530</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deid-vc-speaker-de-identification-via-zero-shot-pseudo-voice-conversion-2209.04530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deid-vc-speaker-de-identification-via-zero-shot-pseudo-voice-conversion-2209.04530"/></url>
<url><loc>https://scifaro.com/en/abs/pay-attention-to-hard-trials-2209.04687</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pay-attention-to-hard-trials-2209.04687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pay-attention-to-hard-trials-2209.04687"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-signal-representations-for-joint-sound-event-detection-and-acoustic-scene-classification-2209.05900</loc><lastmod>2022-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-signal-representations-for-joint-sound-event-detection-and-acoustic-scene-classification-2209.05900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-signal-representations-for-joint-sound-event-detection-and-acoustic-scene-classification-2209.05900"/></url>
<url><loc>https://scifaro.com/en/abs/songdriver-real-time-music-accompaniment-generation-without-logical-latency-nor-exposure-bias-2209.06054</loc><lastmod>2022-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songdriver-real-time-music-accompaniment-generation-without-logical-latency-nor-exposure-bias-2209.06054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songdriver-real-time-music-accompaniment-generation-without-logical-latency-nor-exposure-bias-2209.06054"/></url>
<url><loc>https://scifaro.com/en/abs/using-rater-and-system-metadata-to-explain-variance-in-the-voicemos-challenge-2022-dataset-2209.06358</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-rater-and-system-metadata-to-explain-variance-in-the-voicemos-challenge-2022-dataset-2209.06358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-rater-and-system-metadata-to-explain-variance-in-the-voicemos-challenge-2022-dataset-2209.06358"/></url>
<url><loc>https://scifaro.com/en/abs/i2cr-improving-noise-robustness-on-keyword-spotting-using-inter-intra-contrastive-regularization-2209.06360</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i2cr-improving-noise-robustness-on-keyword-spotting-using-inter-intra-contrastive-regularization-2209.06360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i2cr-improving-noise-robustness-on-keyword-spotting-using-inter-intra-contrastive-regularization-2209.06360"/></url>
<url><loc>https://scifaro.com/en/abs/convnext-based-neural-network-for-audio-anti-spoofing-2209.06434</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convnext-based-neural-network-for-audio-anti-spoofing-2209.06434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convnext-based-neural-network-for-audio-anti-spoofing-2209.06434"/></url>
<url><loc>https://scifaro.com/en/abs/paratts-learning-linguistic-and-prosodic-cross-sentence-information-in-paragraph-based-tts-2209.06484</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paratts-learning-linguistic-and-prosodic-cross-sentence-information-in-paragraph-based-tts-2209.06484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paratts-learning-linguistic-and-prosodic-cross-sentence-information-in-paragraph-based-tts-2209.06484"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-voice-conversion-for-asr-augmentation-2209.06987</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-for-asr-augmentation-2209.06987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-for-asr-augmentation-2209.06987"/></url>
<url><loc>https://scifaro.com/en/abs/beat-transformer-demixed-beat-and-downbeat-tracking-with-dilated-self-attention-2209.07140</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beat-transformer-demixed-beat-and-downbeat-tracking-with-dilated-self-attention-2209.07140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beat-transformer-demixed-beat-and-downbeat-tracking-with-dilated-self-attention-2209.07140"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adversarial-training-on-conditional-variational-auto-encoder-for-controllable-music-generation-2209.07144</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adversarial-training-on-conditional-variational-auto-encoder-for-controllable-music-generation-2209.07144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adversarial-training-on-conditional-variational-auto-encoder-for-controllable-music-generation-2209.07144"/></url>
<url><loc>https://scifaro.com/en/abs/mvnet-memory-assistance-and-vocal-reinforcement-network-for-speech-enhancement-2209.07302</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mvnet-memory-assistance-and-vocal-reinforcement-network-for-speech-enhancement-2209.07302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mvnet-memory-assistance-and-vocal-reinforcement-network-for-speech-enhancement-2209.07302"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-attention-networks-and-uncertainty-loss-weighting-for-multi-task-emotion-recognition-on-vocal-bursts-2209.07384</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-attention-networks-and-uncertainty-loss-weighting-for-multi-task-emotion-recognition-on-vocal-bursts-2209.07384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-attention-networks-and-uncertainty-loss-weighting-for-multi-task-emotion-recognition-on-vocal-bursts-2209.07384"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-synthetic-speech-manipulation-in-real-audio-recordings-2209.07498</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-synthetic-speech-manipulation-in-real-audio-recordings-2209.07498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-synthetic-speech-manipulation-in-real-audio-recordings-2209.07498"/></url>
<url><loc>https://scifaro.com/en/abs/self-relation-attention-and-temporal-awareness-for-emotion-recognition-via-vocal-burst-2209.07629</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-relation-attention-and-temporal-awareness-for-emotion-recognition-via-vocal-burst-2209.07629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-relation-attention-and-temporal-awareness-for-emotion-recognition-via-vocal-burst-2209.07629"/></url>
<url><loc>https://scifaro.com/en/abs/musicaiz-a-python-library-for-symbolic-music-generation-analysis-and-visualization-2209.07974</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicaiz-a-python-library-for-symbolic-music-generation-analysis-and-visualization-2209.07974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicaiz-a-python-library-for-symbolic-music-generation-analysis-and-visualization-2209.07974"/></url>
<url><loc>https://scifaro.com/en/abs/compose-embellish-well-structured-piano-performance-generation-via-a-two-stage-approach-2209.08212</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compose-embellish-well-structured-piano-performance-generation-via-a-two-stage-approach-2209.08212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compose-embellish-well-structured-piano-performance-generation-via-a-two-stage-approach-2209.08212"/></url>
<url><loc>https://scifaro.com/en/abs/playing-technique-detection-by-fusing-note-onset-information-in-guzheng-performance-2209.08774</loc><lastmod>2022-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/playing-technique-detection-by-fusing-note-onset-information-in-guzheng-performance-2209.08774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/playing-technique-detection-by-fusing-note-onset-information-in-guzheng-performance-2209.08774"/></url>
<url><loc>https://scifaro.com/en/abs/the-royalflush-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09010</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-royalflush-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-royalflush-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09010"/></url>
<url><loc>https://scifaro.com/en/abs/sjtu-aispeech-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09076</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sjtu-aispeech-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sjtu-aispeech-system-for-voxceleb-speaker-recognition-challenge-2022-2209.09076"/></url>
<url><loc>https://scifaro.com/en/abs/a-closer-look-at-weakly-supervised-audio-visual-source-localization-2209.09634</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-closer-look-at-weakly-supervised-audio-visual-source-localization-2209.09634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-closer-look-at-weakly-supervised-audio-visual-source-localization-2209.09634"/></url>
<url><loc>https://scifaro.com/en/abs/the-bucea-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.09635</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-bucea-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.09635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-bucea-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.09635"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-for-adaptive-filters-with-higher-order-frequency-dependencies-2209.09955</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-for-adaptive-filters-with-higher-order-frequency-dependencies-2209.09955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-for-adaptive-filters-with-higher-order-frequency-dependencies-2209.09955"/></url>
<url><loc>https://scifaro.com/en/abs/setting-the-rhythm-scene-deep-learning-based-drum-loop-generation-from-arbitrary-language-cues-2209.10016</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/setting-the-rhythm-scene-deep-learning-based-drum-loop-generation-from-arbitrary-language-cues-2209.10016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/setting-the-rhythm-scene-deep-learning-based-drum-loop-generation-from-arbitrary-language-cues-2209.10016"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-time-alignment-of-dimensional-annotations-of-emotion-using-recurrent-neural-networks-2209.10223</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-time-alignment-of-dimensional-annotations-of-emotion-using-recurrent-neural-networks-2209.10223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-time-alignment-of-dimensional-annotations-of-emotion-using-recurrent-neural-networks-2209.10223"/></url>
<url><loc>https://scifaro.com/en/abs/learning-hierarchical-metrical-structure-beyond-measures-2209.10259</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-hierarchical-metrical-structure-beyond-measures-2209.10259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-hierarchical-metrical-structure-beyond-measures-2209.10259"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-perceptual-loudness-of-piano-tone-theory-and-applications-2209.10674</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-perceptual-loudness-of-piano-tone-theory-and-applications-2209.10674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-perceptual-loudness-of-piano-tone-theory-and-applications-2209.10674"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-accented-text-to-speech-synthesis-2209.10804</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-accented-text-to-speech-synthesis-2209.10804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-accented-text-to-speech-synthesis-2209.10804"/></url>
<url><loc>https://scifaro.com/en/abs/the-speakin-system-description-for-cnsrc2022-2209.10846</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-speakin-system-description-for-cnsrc2022-2209.10846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-speakin-system-description-for-cnsrc2022-2209.10846"/></url>
<url><loc>https://scifaro.com/en/abs/mntts-an-open-source-mongolian-text-to-speech-synthesis-dataset-and-accompanied-baseline-2209.10848</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mntts-an-open-source-mongolian-text-to-speech-synthesis-dataset-and-accompanied-baseline-2209.10848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mntts-an-open-source-mongolian-text-to-speech-synthesis-dataset-and-accompanied-baseline-2209.10848"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-stage-multi-codebook-vq-vae-approach-to-high-performance-neural-tts-2209.10887</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-stage-multi-codebook-vq-vae-approach-to-high-performance-neural-tts-2209.10887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-stage-multi-codebook-vq-vae-approach-to-high-performance-neural-tts-2209.10887"/></url>
<url><loc>https://scifaro.com/en/abs/maths-computation-and-flamenco-overview-and-challenges-2209.10970</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maths-computation-and-flamenco-overview-and-challenges-2209.10970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maths-computation-and-flamenco-overview-and-challenges-2209.10970"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-pairwise-preferences-between-tts-audio-stimuli-using-parallel-ratings-data-and-anti-symmetric-twin-neural-networks-2209.11003</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-pairwise-preferences-between-tts-audio-stimuli-using-parallel-ratings-data-and-anti-symmetric-twin-neural-networks-2209.11003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-pairwise-preferences-between-tts-audio-stimuli-using-parallel-ratings-data-and-anti-symmetric-twin-neural-networks-2209.11003"/></url>
<url><loc>https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-monaural-speech-enhancement-2209.11112</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-monaural-speech-enhancement-2209.11112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cmgan-conformer-based-metric-gan-for-monaural-speech-enhancement-2209.11112"/></url>
<url><loc>https://scifaro.com/en/abs/the-microsoft-system-for-voxceleb-speaker-recognition-challenge-2022-2209.11266</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-microsoft-system-for-voxceleb-speaker-recognition-challenge-2022-2209.11266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-microsoft-system-for-voxceleb-speaker-recognition-challenge-2022-2209.11266"/></url>
<url><loc>https://scifaro.com/en/abs/unikw-at-unified-keyword-spotting-and-audio-tagging-2209.11377</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unikw-at-unified-keyword-spotting-and-audio-tagging-2209.11377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unikw-at-unified-keyword-spotting-and-audio-tagging-2209.11377"/></url>
<url><loc>https://scifaro.com/en/abs/an-artificial-neural-network-based-system-for-detecting-machine-failures-using-tiny-sound-data-a-case-study-2209.11527</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-artificial-neural-network-based-system-for-detecting-machine-failures-using-tiny-sound-data-a-case-study-2209.11527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-artificial-neural-network-based-system-for-detecting-machine-failures-using-tiny-sound-data-a-case-study-2209.11527"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-voice-spoofing-detection-based-on-online-hard-example-mining-2209.11585</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-voice-spoofing-detection-based-on-online-hard-example-mining-2209.11585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-voice-spoofing-detection-based-on-online-hard-example-mining-2209.11585"/></url>
<url><loc>https://scifaro.com/en/abs/the-speakin-speaker-verification-system-for-far-field-speaker-verification-challenge-2022-2209.11625</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-speakin-speaker-verification-system-for-far-field-speaker-verification-challenge-2022-2209.11625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-speakin-speaker-verification-system-for-far-field-speaker-verification-challenge-2022-2209.11625"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-perceptually-motivated-optimization-and-dual-transformations-2209.11905</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-perceptually-motivated-optimization-and-dual-transformations-2209.11905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-perceptually-motivated-optimization-and-dual-transformations-2209.11905"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speech-activity-and-overlap-detection-with-multi-exit-architecture-2209.11906</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speech-activity-and-overlap-detection-with-multi-exit-architecture-2209.11906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speech-activity-and-overlap-detection-with-multi-exit-architecture-2209.11906"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-speech-recognition-with-unsupervised-error-correction-2209.12043</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-speech-recognition-with-unsupervised-error-correction-2209.12043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-speech-recognition-with-unsupervised-error-correction-2209.12043"/></url>
<url><loc>https://scifaro.com/en/abs/song-emotion-recognition-a-performance-comparison-between-audio-features-and-artificial-neural-networks-2209.12045</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/song-emotion-recognition-a-performance-comparison-between-audio-features-and-artificial-neural-networks-2209.12045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/song-emotion-recognition-a-performance-comparison-between-audio-features-and-artificial-neural-networks-2209.12045"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-exponentially-modified-gaussian-oscillators-2209.12202</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-exponentially-modified-gaussian-oscillators-2209.12202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-exponentially-modified-gaussian-oscillators-2209.12202"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-adversarial-training-algorithm-for-multi-speaker-neural-text-to-speech-2209.12549</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-adversarial-training-algorithm-for-multi-speaker-neural-text-to-speech-2209.12549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-adversarial-training-algorithm-for-multi-speaker-neural-text-to-speech-2209.12549"/></url>
<url><loc>https://scifaro.com/en/abs/faked-speech-detection-with-zero-prior-knowledge-2209.12573</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/faked-speech-detection-with-zero-prior-knowledge-2209.12573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/faked-speech-detection-with-zero-prior-knowledge-2209.12573"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-language-mismatch-in-automatic-forensic-voice-comparison-using-deep-learning-embeddings-2209.12602</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-language-mismatch-in-automatic-forensic-voice-comparison-using-deep-learning-embeddings-2209.12602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-language-mismatch-in-automatic-forensic-voice-comparison-using-deep-learning-embeddings-2209.12602"/></url>
<url><loc>https://scifaro.com/en/abs/the-efficacy-of-self-supervised-speech-models-for-audio-representations-2209.12900</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-efficacy-of-self-supervised-speech-models-for-audio-representations-2209.12900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-efficacy-of-self-supervised-speech-models-for-audio-representations-2209.12900"/></url>
<url><loc>https://scifaro.com/en/abs/computing-melodic-templates-in-oral-music-traditions-2209.13598</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computing-melodic-templates-in-oral-music-traditions-2209.13598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computing-melodic-templates-in-oral-music-traditions-2209.13598"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-multitask-learning-architecture-for-affective-vocal-burst-analysis-2209.13914</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-multitask-learning-architecture-for-affective-vocal-burst-analysis-2209.13914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-multitask-learning-architecture-for-affective-vocal-burst-analysis-2209.13914"/></url>
<url><loc>https://scifaro.com/en/abs/mewehv-mel-and-wave-embeddings-for-human-voice-tasks-2209.14078</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mewehv-mel-and-wave-embeddings-for-human-voice-tasks-2209.14078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mewehv-mel-and-wave-embeddings-for-human-voice-tasks-2209.14078"/></url>
<url><loc>https://scifaro.com/en/abs/deepfake-audio-detection-by-speaker-verification-2209.14098</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfake-audio-detection-by-speaker-verification-2209.14098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfake-audio-detection-by-speaker-verification-2209.14098"/></url>
<url><loc>https://scifaro.com/en/abs/audio-barlow-twins-self-supervised-audio-representation-learning-2209.14345</loc><lastmod>2022-12-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-barlow-twins-self-supervised-audio-representation-learning-2209.14345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-barlow-twins-self-supervised-audio-representation-learning-2209.14345"/></url>
<url><loc>https://scifaro.com/en/abs/the-chamber-ensemble-generator-limitless-high-quality-mir-data-via-generative-modeling-2209.14458</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-chamber-ensemble-generator-limitless-high-quality-mir-data-via-generative-modeling-2209.14458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-chamber-ensemble-generator-limitless-high-quality-mir-data-via-generative-modeling-2209.14458"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-vocal-bursts-for-acii-2022-a-vb-type-competition-using-convolutional-neural-networks-and-deep-acoustic-embeddings-2209.14842</loc><lastmod>2022-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-vocal-bursts-for-acii-2022-a-vb-type-competition-using-convolutional-neural-networks-and-deep-acoustic-embeddings-2209.14842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-vocal-bursts-for-acii-2022-a-vb-type-competition-using-convolutional-neural-networks-and-deep-acoustic-embeddings-2209.14842"/></url>
<url><loc>https://scifaro.com/en/abs/convrnn-t-convolutional-augmented-recurrent-neural-network-transducers-for-streaming-speech-recognition-2209.14868</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convrnn-t-convolutional-augmented-recurrent-neural-network-transducers-for-streaming-speech-recognition-2209.14868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convrnn-t-convolutional-augmented-recurrent-neural-network-transducers-for-streaming-speech-recognition-2209.14868"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-of-weakly-supervised-audio-tagging-embeddings-for-general-audio-representations-2209.15167</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-of-weakly-supervised-audio-tagging-embeddings-for-general-audio-representations-2209.15167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-of-weakly-supervised-audio-tagging-embeddings-for-general-audio-representations-2209.15167"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-encoder-decoder-architecture-with-top-down-attention-for-speech-separation-2209.15200</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-encoder-decoder-architecture-with-top-down-attention-for-speech-separation-2209.15200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-encoder-decoder-architecture-with-top-down-attention-for-speech-separation-2209.15200"/></url>
<url><loc>https://scifaro.com/en/abs/wake-word-detection-based-on-res2net-2209.15296</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wake-word-detection-based-on-res2net-2209.15296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wake-word-detection-based-on-res2net-2209.15296"/></url>
<url><loc>https://scifaro.com/en/abs/symphony-localizing-multiple-acoustic-sources-with-a-single-microphone-array-2209.15325</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symphony-localizing-multiple-acoustic-sources-with-a-single-microphone-array-2209.15325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symphony-localizing-multiple-acoustic-sources-with-a-single-microphone-array-2209.15325"/></url>
<url><loc>https://scifaro.com/en/abs/chordmics-acoustic-signal-purification-with-distributed-microphones-2209.15334</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chordmics-acoustic-signal-purification-with-distributed-microphones-2209.15334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chordmics-acoustic-signal-purification-with-distributed-microphones-2209.15334"/></url>
<url><loc>https://scifaro.com/en/abs/audiogen-textually-guided-audio-generation-2209.15352</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiogen-textually-guided-audio-generation-2209.15352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiogen-textually-guided-audio-generation-2209.15352"/></url>
<url><loc>https://scifaro.com/en/abs/match-to-win-analysing-sequences-lengths-for-efficient-self-supervised-learning-in-speech-and-audio-2209.15575</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/match-to-win-analysing-sequences-lengths-for-efficient-self-supervised-learning-in-speech-and-audio-2209.15575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/match-to-win-analysing-sequences-lengths-for-efficient-self-supervised-learning-in-speech-and-audio-2209.15575"/></url>
<url><loc>https://scifaro.com/en/abs/hsd-a-hierarchical-singing-annotation-dataset-2209.15640</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hsd-a-hierarchical-singing-annotation-dataset-2209.15640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hsd-a-hierarchical-singing-annotation-dataset-2209.15640"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stage-progressive-compression-of-conformer-transducer-for-on-device-speech-recognition-2210.00169</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stage-progressive-compression-of-conformer-transducer-for-on-device-speech-recognition-2210.00169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stage-progressive-compression-of-conformer-transducer-for-on-device-speech-recognition-2210.00169"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-acoustic-feature-transformation-in-mismatched-environments-using-a-guided-gan-2210.00721</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-acoustic-feature-transformation-in-mismatched-environments-using-a-guided-gan-2210.00721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-acoustic-feature-transformation-in-mismatched-environments-using-a-guided-gan-2210.00721"/></url>
<url><loc>https://scifaro.com/en/abs/push-pull-characterizing-the-adversarial-robustness-for-audio-visual-active-speaker-detection-2210.00753</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/push-pull-characterizing-the-adversarial-robustness-for-audio-visual-active-speaker-detection-2210.00753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/push-pull-characterizing-the-adversarial-robustness-for-audio-visual-active-speaker-detection-2210.00753"/></url>
<url><loc>https://scifaro.com/en/abs/and-what-if-two-musical-versions-don-t-share-melody-harmony-rhythm-or-lyrics-2210.01256</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/and-what-if-two-musical-versions-don-t-share-melody-harmony-rhythm-or-lyrics-2210.01256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/and-what-if-two-musical-versions-don-t-share-melody-harmony-rhythm-or-lyrics-2210.01256"/></url>
<url><loc>https://scifaro.com/en/abs/pay-self-attention-to-audio-visual-navigation-2210.01353</loc><lastmod>2022-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pay-self-attention-to-audio-visual-navigation-2210.01353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pay-self-attention-to-audio-visual-navigation-2210.01353"/></url>
<url><loc>https://scifaro.com/en/abs/rhythmic-gesticulator-rhythm-aware-co-speech-gesture-synthesis-with-hierarchical-neural-embeddings-2210.01448</loc><lastmod>2023-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythmic-gesticulator-rhythm-aware-co-speech-gesture-synthesis-with-hierarchical-neural-embeddings-2210.01448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythmic-gesticulator-rhythm-aware-co-speech-gesture-synthesis-with-hierarchical-neural-embeddings-2210.01448"/></url>
<url><loc>https://scifaro.com/en/abs/improving-label-deficient-keyword-spotting-through-self-supervised-pretraining-2210.01703</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-label-deficient-keyword-spotting-through-self-supervised-pretraining-2210.01703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-label-deficient-keyword-spotting-through-self-supervised-pretraining-2210.01703"/></url>
<url><loc>https://scifaro.com/en/abs/learning-temporal-resolution-in-spectrogram-for-audio-classification-2210.01719</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-temporal-resolution-in-spectrogram-for-audio-classification-2210.01719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-temporal-resolution-in-spectrogram-for-audio-classification-2210.01719"/></url>
<url><loc>https://scifaro.com/en/abs/tc-sknet-with-gridmask-for-low-complexity-classification-of-acoustic-scene-2210.02287</loc><lastmod>2022-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tc-sknet-with-gridmask-for-low-complexity-classification-of-acoustic-scene-2210.02287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tc-sknet-with-gridmask-for-low-complexity-classification-of-acoustic-scene-2210.02287"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2021-towards-spoofed-and-deepfake-speech-detection-in-the-wild-2210.02437</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2021-towards-spoofed-and-deepfake-speech-detection-in-the-wild-2210.02437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2021-towards-spoofed-and-deepfake-speech-detection-in-the-wild-2210.02437"/></url>
<url><loc>https://scifaro.com/en/abs/feasibility-on-detecting-door-slamming-towards-monitoring-early-signs-of-domestic-violence-2210.02642</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feasibility-on-detecting-door-slamming-towards-monitoring-early-signs-of-domestic-violence-2210.02642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feasibility-on-detecting-door-slamming-towards-monitoring-early-signs-of-domestic-violence-2210.02642"/></url>
<url><loc>https://scifaro.com/en/abs/psvrf-learning-to-restore-pitch-shifted-voice-without-reference-2210.02731</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psvrf-learning-to-restore-pitch-shifted-voice-without-reference-2210.02731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psvrf-learning-to-restore-pitch-shifted-voice-without-reference-2210.02731"/></url>
<url><loc>https://scifaro.com/en/abs/the-sound-of-silence-efficiency-of-first-digit-features-in-synthetic-audio-detection-2210.02746</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sound-of-silence-efficiency-of-first-digit-features-in-synthetic-audio-detection-2210.02746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sound-of-silence-efficiency-of-first-digit-features-in-synthetic-audio-detection-2210.02746"/></url>
<url><loc>https://scifaro.com/en/abs/melody-infilling-with-user-provided-structural-context-2210.02829</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-infilling-with-user-provided-structural-context-2210.02829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-infilling-with-user-provided-structural-context-2210.02829"/></url>
<url><loc>https://scifaro.com/en/abs/wakeupnet-a-mobile-transformer-based-framework-for-end-to-end-streaming-voice-trigger-2210.02904</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wakeupnet-a-mobile-transformer-based-framework-for-end-to-end-streaming-voice-trigger-2210.02904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wakeupnet-a-mobile-transformer-based-framework-for-end-to-end-streaming-voice-trigger-2210.02904"/></url>
<url><loc>https://scifaro.com/en/abs/animetab-a-new-guitar-tablature-dataset-of-anime-and-game-music-2210.03027</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/animetab-a-new-guitar-tablature-dataset-of-anime-and-game-music-2210.03027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/animetab-a-new-guitar-tablature-dataset-of-anime-and-game-music-2210.03027"/></url>
<url><loc>https://scifaro.com/en/abs/damage-control-during-domain-adaptation-for-transducer-based-automatic-speech-recognition-2210.03255</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/damage-control-during-domain-adaptation-for-transducer-based-automatic-speech-recognition-2210.03255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/damage-control-during-domain-adaptation-for-transducer-based-automatic-speech-recognition-2210.03255"/></url>
<url><loc>https://scifaro.com/en/abs/the-perspectiveliberator-an-upmixing-6dof-rendering-plugin-for-single-perspective-ambisonic-room-impulse-responses-2210.03360</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-perspectiveliberator-an-upmixing-6dof-rendering-plugin-for-single-perspective-ambisonic-room-impulse-responses-2210.03360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-perspectiveliberator-an-upmixing-6dof-rendering-plugin-for-single-perspective-ambisonic-room-impulse-responses-2210.03360"/></url>
<url><loc>https://scifaro.com/en/abs/model-based-estimation-of-in-car-communication-feedback-applied-to-speech-zone-detection-2210.03363</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-based-estimation-of-in-car-communication-feedback-applied-to-speech-zone-detection-2210.03363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-based-estimation-of-in-car-communication-feedback-applied-to-speech-zone-detection-2210.03363"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-affective-speech-synthesis-and-conversion-in-the-deep-learning-era-2210.03538</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-affective-speech-synthesis-and-conversion-in-the-deep-learning-era-2210.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-affective-speech-synthesis-and-conversion-in-the-deep-learning-era-2210.03538"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-and-unsupervised-learning-of-audio-representations-for-music-understanding-2210.03799</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-and-unsupervised-learning-of-audio-representations-for-music-understanding-2210.03799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-and-unsupervised-learning-of-audio-representations-for-music-understanding-2210.03799"/></url>
<url><loc>https://scifaro.com/en/abs/cobert-self-supervised-speech-representation-learning-through-code-representation-learning-2210.04062</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cobert-self-supervised-speech-representation-learning-through-code-representation-learning-2210.04062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cobert-self-supervised-speech-representation-learning-through-code-representation-learning-2210.04062"/></url>
<url><loc>https://scifaro.com/en/abs/automated-audio-captioning-via-fusion-of-low-and-high-dimensional-features-2210.05037</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-audio-captioning-via-fusion-of-low-and-high-dimensional-features-2210.05037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-audio-captioning-via-fusion-of-low-and-high-dimensional-features-2210.05037"/></url>
<url><loc>https://scifaro.com/en/abs/conchshell-a-generative-adversarial-networks-that-turns-pictures-into-piano-music-2210.05076</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conchshell-a-generative-adversarial-networks-that-turns-pictures-into-piano-music-2210.05076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conchshell-a-generative-adversarial-networks-that-turns-pictures-into-piano-music-2210.05076"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-tencent-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.05092</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-tencent-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.05092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-tencent-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.05092"/></url>
<url><loc>https://scifaro.com/en/abs/diffroll-diffusion-based-generative-music-transcription-with-unsupervised-pretraining-capability-2210.05148</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffroll-diffusion-based-generative-music-transcription-with-unsupervised-pretraining-capability-2210.05148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffroll-diffusion-based-generative-music-transcription-with-unsupervised-pretraining-capability-2210.05148"/></url>
<url><loc>https://scifaro.com/en/abs/deep-spectro-temporal-artifacts-for-detecting-synthesized-speech-2210.05254</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-spectro-temporal-artifacts-for-detecting-synthesized-speech-2210.05254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-spectro-temporal-artifacts-for-detecting-synthesized-speech-2210.05254"/></url>
<url><loc>https://scifaro.com/en/abs/mfcca-multi-frame-cross-channel-attention-for-multi-speaker-asr-in-multi-party-meeting-scenario-2210.05265</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfcca-multi-frame-cross-channel-attention-for-multi-speaker-asr-in-multi-party-meeting-scenario-2210.05265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfcca-multi-frame-cross-channel-attention-for-multi-speaker-asr-in-multi-party-meeting-scenario-2210.05265"/></url>
<url><loc>https://scifaro.com/en/abs/gan-you-hear-me-reclaiming-unconditional-speech-synthesis-from-diffusion-models-2210.05271</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gan-you-hear-me-reclaiming-unconditional-speech-synthesis-from-diffusion-models-2210.05271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gan-you-hear-me-reclaiming-unconditional-speech-synthesis-from-diffusion-models-2210.05271"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-study-on-private-aggregation-of-teacher-ensemble-learning-for-end-to-end-speech-recognition-2210.05614</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-study-on-private-aggregation-of-teacher-ensemble-learning-for-end-to-end-speech-recognition-2210.05614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-study-on-private-aggregation-of-teacher-ensemble-learning-for-end-to-end-speech-recognition-2210.05614"/></url>
<url><loc>https://scifaro.com/en/abs/enemy-spotted-in-game-gun-sound-dataset-for-gunshot-classification-and-localization-2210.05917</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enemy-spotted-in-game-gun-sound-dataset-for-gunshot-classification-and-localization-2210.05917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enemy-spotted-in-game-gun-sound-dataset-for-gunshot-classification-and-localization-2210.05917"/></url>
<url><loc>https://scifaro.com/en/abs/jukedrummer-conditional-beat-aware-audio-domain-drum-accompaniment-generation-via-transformer-vq-vae-2210.06007</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jukedrummer-conditional-beat-aware-audio-domain-drum-accompaniment-generation-via-transformer-vq-vae-2210.06007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jukedrummer-conditional-beat-aware-audio-domain-drum-accompaniment-generation-via-transformer-vq-vae-2210.06007"/></url>
<url><loc>https://scifaro.com/en/abs/specrnet-towards-faster-and-more-accessible-audio-deepfake-detection-2210.06105</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specrnet-towards-faster-and-more-accessible-audio-deepfake-detection-2210.06105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specrnet-towards-faster-and-more-accessible-audio-deepfake-detection-2210.06105"/></url>
<url><loc>https://scifaro.com/en/abs/thuee-system-description-for-nist-2020-sre-cts-challenge-2210.06111</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thuee-system-description-for-nist-2020-sre-cts-challenge-2210.06111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thuee-system-description-for-nist-2020-sre-cts-challenge-2210.06111"/></url>
<url><loc>https://scifaro.com/en/abs/individualized-conditioning-and-negative-distances-for-speaker-separation-2210.06368</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/individualized-conditioning-and-negative-distances-for-speaker-separation-2210.06368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/individualized-conditioning-and-negative-distances-for-speaker-separation-2210.06368"/></url>
<url><loc>https://scifaro.com/en/abs/anonymizing-speech-with-generative-adversarial-networks-to-preserve-speaker-privacy-2210.07002</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anonymizing-speech-with-generative-adversarial-networks-to-preserve-speaker-privacy-2210.07002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anonymizing-speech-with-generative-adversarial-networks-to-preserve-speaker-privacy-2210.07002"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-diffusion-models-for-singing-voice-neural-vocoder-2210.07508</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-diffusion-models-for-singing-voice-neural-vocoder-2210.07508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-diffusion-models-for-singing-voice-neural-vocoder-2210.07508"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-speech-synthesizer-attribution-in-an-open-set-scenario-2210.07546</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-speech-synthesizer-attribution-in-an-open-set-scenario-2210.07546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-speech-synthesizer-attribution-in-an-open-set-scenario-2210.07546"/></url>
<url><loc>https://scifaro.com/en/abs/empirical-study-incorporating-linguistic-knowledge-on-filled-pauses-for-personalized-spontaneous-speech-synthesis-2210.07559</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empirical-study-incorporating-linguistic-knowledge-on-filled-pauses-for-personalized-spontaneous-speech-synthesis-2210.07559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empirical-study-incorporating-linguistic-knowledge-on-filled-pauses-for-personalized-spontaneous-speech-synthesis-2210.07559"/></url>
<url><loc>https://scifaro.com/en/abs/training-speech-emotion-classifier-without-categorical-annotations-2210.07642</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-speech-emotion-classifier-without-categorical-annotations-2210.07642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-speech-emotion-classifier-without-categorical-annotations-2210.07642"/></url>
<url><loc>https://scifaro.com/en/abs/full-stack-bioacoustics-field-kit-to-ai-to-action-workshop-report-2210.07685</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-stack-bioacoustics-field-kit-to-ai-to-action-workshop-report-2210.07685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-stack-bioacoustics-field-kit-to-ai-to-action-workshop-report-2210.07685"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-rnn-based-speech-enhancement-on-a-multi-core-mcu-with-mixed-fp16-int8-post-training-quantization-2210.07692</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-rnn-based-speech-enhancement-on-a-multi-core-mcu-with-mixed-fp16-int8-post-training-quantization-2210.07692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-rnn-based-speech-enhancement-on-a-multi-core-mcu-with-mixed-fp16-int8-post-training-quantization-2210.07692"/></url>
<url><loc>https://scifaro.com/en/abs/improving-generalizability-of-distilled-self-supervised-speech-processing-models-under-distorted-settings-2210.07978</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-generalizability-of-distilled-self-supervised-speech-processing-models-under-distorted-settings-2210.07978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-generalizability-of-distilled-self-supervised-speech-processing-models-under-distorted-settings-2210.07978"/></url>
<url><loc>https://scifaro.com/en/abs/learning-invariant-representation-and-risk-minimized-for-unsupervised-accent-domain-adaptation-2210.08182</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-invariant-representation-and-risk-minimized-for-unsupervised-accent-domain-adaptation-2210.08182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-invariant-representation-and-risk-minimized-for-unsupervised-accent-domain-adaptation-2210.08182"/></url>
<url><loc>https://scifaro.com/en/abs/a-policy-based-approach-to-the-specaugment-method-for-low-resource-e2e-asr-2210.08520</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-policy-based-approach-to-the-specaugment-method-for-low-resource-e2e-asr-2210.08520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-policy-based-approach-to-the-specaugment-method-for-low-resource-e2e-asr-2210.08520"/></url>
<url><loc>https://scifaro.com/en/abs/robust-general-and-low-complexity-acoustic-scene-classification-systems-and-an-effective-visualization-for-presenting-a-sound-scene-context-2210.08610</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-general-and-low-complexity-acoustic-scene-classification-systems-and-an-effective-visualization-for-presenting-a-sound-scene-context-2210.08610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-general-and-low-complexity-acoustic-scene-classification-systems-and-an-effective-visualization-for-presenting-a-sound-scene-context-2210.08610"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-leverage-dnn-based-speech-enhancement-for-multi-channel-speaker-verification-2210.08834</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-leverage-dnn-based-speech-enhancement-for-multi-channel-speaker-verification-2210.08834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-leverage-dnn-based-speech-enhancement-for-multi-channel-speaker-verification-2210.08834"/></url>
<url><loc>https://scifaro.com/en/abs/visual-onoma-to-wave-environmental-sound-synthesis-from-visual-onomatopoeias-and-sound-source-images-2210.09173</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-onoma-to-wave-environmental-sound-synthesis-from-visual-onomatopoeias-and-sound-source-images-2210.09173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-onoma-to-wave-environmental-sound-synthesis-from-visual-onomatopoeias-and-sound-source-images-2210.09173"/></url>
<url><loc>https://scifaro.com/en/abs/sub-8-bit-quantization-for-on-device-speech-recognition-a-regularization-free-approach-2210.09188</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-8-bit-quantization-for-on-device-speech-recognition-a-regularization-free-approach-2210.09188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-8-bit-quantization-for-on-device-speech-recognition-a-regularization-free-approach-2210.09188"/></url>
<url><loc>https://scifaro.com/en/abs/svldl-improved-speaker-age-estimation-using-selective-variance-label-distribution-learning-2210.09524</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svldl-improved-speaker-age-estimation-using-selective-variance-label-distribution-learning-2210.09524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svldl-improved-speaker-age-estimation-using-selective-variance-label-distribution-learning-2210.09524"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-system-of-sound-event-detection-transformer-and-frame-wise-model-for-dcase-2022-task-4-2210.09529</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-system-of-sound-event-detection-transformer-and-frame-wise-model-for-dcase-2022-task-4-2210.09529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-system-of-sound-event-detection-transformer-and-frame-wise-model-for-dcase-2022-task-4-2210.09529"/></url>
<url><loc>https://scifaro.com/en/abs/improving-robustness-of-spontaneous-speech-synthesis-with-linguistic-speech-regularization-and-pseudo-filled-pause-insertion-2210.09815</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-robustness-of-spontaneous-speech-synthesis-with-linguistic-speech-regularization-and-pseudo-filled-pause-insertion-2210.09815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-robustness-of-spontaneous-speech-synthesis-with-linguistic-speech-regularization-and-pseudo-filled-pause-insertion-2210.09815"/></url>
<url><loc>https://scifaro.com/en/abs/mid-attribute-speaker-generation-using-optimal-transport-based-interpolation-of-gaussian-mixture-models-2210.09916</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mid-attribute-speaker-generation-using-optimal-transport-based-interpolation-of-gaussian-mixture-models-2210.09916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mid-attribute-speaker-generation-using-optimal-transport-based-interpolation-of-gaussian-mixture-models-2210.09916"/></url>
<url><loc>https://scifaro.com/en/abs/hmm-vs-ctc-for-automatic-speech-recognition-comparison-based-on-full-sum-training-from-scratch-2210.09951</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hmm-vs-ctc-for-automatic-speech-recognition-comparison-based-on-full-sum-training-from-scratch-2210.09951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hmm-vs-ctc-for-automatic-speech-recognition-comparison-based-on-full-sum-training-from-scratch-2210.09951"/></url>
<url><loc>https://scifaro.com/en/abs/birdsoundsdenoising-deep-visual-audio-denoising-for-bird-sounds-2210.10196</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/birdsoundsdenoising-deep-visual-audio-denoising-for-bird-sounds-2210.10196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/birdsoundsdenoising-deep-visual-audio-denoising-for-bird-sounds-2210.10196"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-age-invariant-training-for-child-acoustic-modeling-using-adversarial-multi-task-learning-2210.10231</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-age-invariant-training-for-child-acoustic-modeling-using-adversarial-multi-task-learning-2210.10231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-age-invariant-training-for-child-acoustic-modeling-using-adversarial-multi-task-learning-2210.10231"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-training-method-for-japanese-electrolaryngeal-speech-enhancement-based-on-sequence-to-sequence-voice-conversion-2210.10314</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-training-method-for-japanese-electrolaryngeal-speech-enhancement-based-on-sequence-to-sequence-voice-conversion-2210.10314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-training-method-for-japanese-electrolaryngeal-speech-enhancement-based-on-sequence-to-sequence-voice-conversion-2210.10314"/></url>
<url><loc>https://scifaro.com/en/abs/museformer-transformer-with-fine-and-coarse-grained-attention-for-music-generation-2210.10349</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/museformer-transformer-with-fine-and-coarse-grained-attention-for-music-generation-2210.10349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/museformer-transformer-with-fine-and-coarse-grained-attention-for-music-generation-2210.10349"/></url>
<url><loc>https://scifaro.com/en/abs/audio-tampering-detection-based-on-shallow-and-deep-feature-representation-learning-2210.10506</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-tampering-detection-based-on-shallow-and-deep-feature-representation-learning-2210.10506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-tampering-detection-based-on-shallow-and-deep-feature-representation-learning-2210.10506"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-dereverberation-beamforming-and-self-supervised-learning-representation-2210.10742</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-dereverberation-beamforming-and-self-supervised-learning-representation-2210.10742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-recognition-dereverberation-beamforming-and-self-supervised-learning-representation-2210.10742"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-animal-vocalizations-through-synthesizers-2210.10857</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-animal-vocalizations-through-synthesizers-2210.10857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-animal-vocalizations-through-synthesizers-2210.10857"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-learning-of-generalised-representations-for-speaker-recognition-2210.10985</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-learning-of-generalised-representations-for-speaker-recognition-2210.10985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-learning-of-generalised-representations-for-speaker-recognition-2210.10985"/></url>
<url><loc>https://scifaro.com/en/abs/robust-one-shot-singing-voice-conversion-2210.11096</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-one-shot-singing-voice-conversion-2210.11096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-one-shot-singing-voice-conversion-2210.11096"/></url>
<url><loc>https://scifaro.com/en/abs/play-it-back-iterative-attention-for-audio-recognition-2210.11328</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/play-it-back-iterative-attention-for-audio-recognition-2210.11328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/play-it-back-iterative-attention-for-audio-recognition-2210.11328"/></url>
<url><loc>https://scifaro.com/en/abs/text-enhancement-for-paragraph-processing-in-end-to-end-code-switching-tts-2210.11429</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-enhancement-for-paragraph-processing-in-end-to-end-code-switching-tts-2210.11429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-enhancement-for-paragraph-processing-in-end-to-end-code-switching-tts-2210.11429"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-re-calibration-of-channel-wise-features-for-adversarial-audio-classification-2210.11722</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-re-calibration-of-channel-wise-features-for-adversarial-audio-classification-2210.11722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-re-calibration-of-channel-wise-features-for-adversarial-audio-classification-2210.11722"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-permutation-invariant-training-for-universal-sound-separation-2210.12108</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-permutation-invariant-training-for-universal-sound-separation-2210.12108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-permutation-invariant-training-for-universal-sound-separation-2210.12108"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-bilingual-neural-transducer-with-synthetic-code-switching-text-generation-2210.12214</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-bilingual-neural-transducer-with-synthetic-code-switching-text-generation-2210.12214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-bilingual-neural-transducer-with-synthetic-code-switching-text-generation-2210.12214"/></url>
<url><loc>https://scifaro.com/en/abs/beans-the-benchmark-of-animal-sounds-2210.12300</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beans-the-benchmark-of-animal-sounds-2210.12300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beans-the-benchmark-of-animal-sounds-2210.12300"/></url>
<url><loc>https://scifaro.com/en/abs/neural-sound-field-decomposition-with-super-resolution-of-sound-direction-2210.12345</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-sound-field-decomposition-with-super-resolution-of-sound-direction-2210.12345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-sound-field-decomposition-with-super-resolution-of-sound-direction-2210.12345"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-via-an-attentive-time-frequency-neural-network-2210.12430</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-via-an-attentive-time-frequency-neural-network-2210.12430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-via-an-attentive-time-frequency-neural-network-2210.12430"/></url>
<url><loc>https://scifaro.com/en/abs/gct-gated-contextual-transformer-for-sequential-audio-tagging-2210.12541</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gct-gated-contextual-transformer-for-sequential-audio-tagging-2210.12541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gct-gated-contextual-transformer-for-sequential-audio-tagging-2210.12541"/></url>
<url><loc>https://scifaro.com/en/abs/quantitative-evidence-on-overlooked-aspects-of-enrollment-speaker-embeddings-for-target-speaker-separation-2210.12635</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantitative-evidence-on-overlooked-aspects-of-enrollment-speaker-embeddings-for-target-speaker-separation-2210.12635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantitative-evidence-on-overlooked-aspects-of-enrollment-speaker-embeddings-for-target-speaker-separation-2210.12635"/></url>
<url><loc>https://scifaro.com/en/abs/10-hours-data-is-all-you-need-2210.13067</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/10-hours-data-is-all-you-need-2210.13067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/10-hours-data-is-all-you-need-2210.13067"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-clustering-aware-learning-of-embeddings-for-speaker-diarisation-2210.13576</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-clustering-aware-learning-of-embeddings-for-speaker-diarisation-2210.13576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-clustering-aware-learning-of-embeddings-for-speaker-diarisation-2210.13576"/></url>
<url><loc>https://scifaro.com/en/abs/adapitch-adaption-multi-speaker-text-to-speech-conditioned-on-pitch-disentangling-with-untranscribed-data-2210.13803</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapitch-adaption-multi-speaker-text-to-speech-conditioned-on-pitch-disentangling-with-untranscribed-data-2210.13803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapitch-adaption-multi-speaker-text-to-speech-conditioned-on-pitch-disentangling-with-untranscribed-data-2210.13803"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-representation-learning-via-speech-level-and-phoneme-level-masking-approach-2210.13805</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-representation-learning-via-speech-level-and-phoneme-level-masking-approach-2210.13805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-representation-learning-via-speech-level-and-phoneme-level-masking-approach-2210.13805"/></url>
<url><loc>https://scifaro.com/en/abs/metaspeech-speech-effects-switch-along-with-environment-for-metaverse-2210.13811</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metaspeech-speech-effects-switch-along-with-environment-for-metaverse-2210.13811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metaspeech-speech-effects-switch-along-with-environment-for-metaverse-2210.13811"/></url>
<url><loc>https://scifaro.com/en/abs/coloc-conditioned-localizer-and-classifier-for-sound-event-localization-and-detection-2210.13932</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coloc-conditioned-localizer-and-classifier-for-sound-event-localization-and-detection-2210.13932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coloc-conditioned-localizer-and-classifier-for-sound-event-localization-and-detection-2210.13932"/></url>
<url><loc>https://scifaro.com/en/abs/audio-mfcc-gram-transformers-for-respiratory-insufficiency-detection-in-covid-19-2210.14085</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-mfcc-gram-transformers-for-respiratory-insufficiency-detection-in-covid-19-2210.14085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-mfcc-gram-transformers-for-respiratory-insufficiency-detection-in-covid-19-2210.14085"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-speech-endpoint-detection-with-regression-targets-2210.14252</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-speech-endpoint-detection-with-regression-targets-2210.14252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-speech-endpoint-detection-with-regression-targets-2210.14252"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-aslp-system-for-the-iscslp-2022-magichub-code-swiching-asr-challenge-2210.14448</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-aslp-system-for-the-iscslp-2022-magichub-code-swiching-asr-challenge-2210.14448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-aslp-system-for-the-iscslp-2022-magichub-code-swiching-asr-challenge-2210.14448"/></url>
<url><loc>https://scifaro.com/en/abs/scp-gan-self-correcting-discriminator-optimization-for-training-consistency-preserving-metric-gan-on-speech-enhancement-tasks-2210.14474</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scp-gan-self-correcting-discriminator-optimization-for-training-consistency-preserving-metric-gan-on-speech-enhancement-tasks-2210.14474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scp-gan-self-correcting-discriminator-optimization-for-training-consistency-preserving-metric-gan-on-speech-enhancement-tasks-2210.14474"/></url>
<url><loc>https://scifaro.com/en/abs/aves-animal-vocalization-encoder-based-on-self-supervision-2210.14493</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aves-animal-vocalization-encoder-based-on-self-supervision-2210.14493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aves-animal-vocalization-encoder-based-on-self-supervision-2210.14493"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-dimensional-emotion-recognition-by-fusing-predictions-of-acoustic-and-text-networks-using-svm-2210.14495</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-dimensional-emotion-recognition-by-fusing-predictions-of-acoustic-and-text-networks-using-svm-2210.14495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-dimensional-emotion-recognition-by-fusing-predictions-of-acoustic-and-text-networks-using-svm-2210.14495"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-gated-neural-network-with-attention-mechanism-for-speech-enhancement-2210.14509</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-gated-neural-network-with-attention-mechanism-for-speech-enhancement-2210.14509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-gated-neural-network-with-attention-mechanism-for-speech-enhancement-2210.14509"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-data-mosaicing-with-simulation-based-inference-2210.14602</loc><lastmod>2023-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-data-mosaicing-with-simulation-based-inference-2210.14602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-data-mosaicing-with-simulation-based-inference-2210.14602"/></url>
<url><loc>https://scifaro.com/en/abs/fast-yet-effective-speech-emotion-recognition-with-self-distillation-2210.14636</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-yet-effective-speech-emotion-recognition-with-self-distillation-2210.14636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-yet-effective-speech-emotion-recognition-with-self-distillation-2210.14636"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-based-on-multi-channel-microphone-array-in-small-scale-meeting-2210.14644</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-based-on-multi-channel-microphone-array-in-small-scale-meeting-2210.14644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-based-on-multi-channel-microphone-array-in-small-scale-meeting-2210.14644"/></url>
<url><loc>https://scifaro.com/en/abs/tsup-speaker-diarization-system-for-conversational-short-phrase-speaker-diarization-challenge-2210.14653</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tsup-speaker-diarization-system-for-conversational-short-phrase-speaker-diarization-challenge-2210.14653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tsup-speaker-diarization-system-for-conversational-short-phrase-speaker-diarization-challenge-2210.14653"/></url>
<url><loc>https://scifaro.com/en/abs/full-band-general-audio-synthesis-with-score-based-diffusion-2210.14661</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-band-general-audio-synthesis-with-score-based-diffusion-2210.14661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-band-general-audio-synthesis-with-score-based-diffusion-2210.14661"/></url>
<url><loc>https://scifaro.com/en/abs/in-search-of-strong-embedding-extractors-for-speaker-diarisation-2210.14682</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-search-of-strong-embedding-extractors-for-speaker-diarisation-2210.14682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-search-of-strong-embedding-extractors-for-speaker-diarisation-2210.14682"/></url>
<url><loc>https://scifaro.com/en/abs/pronunciation-generation-for-foreign-language-words-in-intra-sentential-code-switching-speech-recognition-2210.14691</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pronunciation-generation-for-foreign-language-words-in-intra-sentential-code-switching-speech-recognition-2210.14691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pronunciation-generation-for-foreign-language-words-in-intra-sentential-code-switching-speech-recognition-2210.14691"/></url>
<url><loc>https://scifaro.com/en/abs/pretrained-audio-neural-networks-for-speech-emotion-recognition-in-portuguese-2210.14716</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretrained-audio-neural-networks-for-speech-emotion-recognition-in-portuguese-2210.14716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretrained-audio-neural-networks-for-speech-emotion-recognition-in-portuguese-2210.14716"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-based-on-reference-model-for-low-resource-tts-2210.14723</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-based-on-reference-model-for-low-resource-tts-2210.14723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-based-on-reference-model-for-low-resource-tts-2210.14723"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-synthesis-from-dark-data-with-evaluation-in-the-loop-data-selection-2210.14850</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-synthesis-from-dark-data-with-evaluation-in-the-loop-data-selection-2210.14850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-synthesis-from-dark-data-with-evaluation-in-the-loop-data-selection-2210.14850"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-transfer-for-on-device-speech-emotion-recognition-with-neural-structured-learning-2210.14977</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-transfer-for-on-device-speech-emotion-recognition-with-neural-structured-learning-2210.14977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-transfer-for-on-device-speech-emotion-recognition-with-neural-structured-learning-2210.14977"/></url>
<url><loc>https://scifaro.com/en/abs/towards-high-quality-neural-tts-for-low-resource-languages-by-learning-compact-speech-representations-2210.15131</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-high-quality-neural-tts-for-low-resource-languages-by-learning-compact-speech-representations-2210.15131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-high-quality-neural-tts-for-low-resource-languages-by-learning-compact-speech-representations-2210.15131"/></url>
<url><loc>https://scifaro.com/en/abs/v-cloak-intelligibility-naturalness-timbre-preserving-real-time-voice-anonymization-2210.15140</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/v-cloak-intelligibility-naturalness-timbre-preserving-real-time-voice-anonymization-2210.15140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/v-cloak-intelligibility-naturalness-timbre-preserving-real-time-voice-anonymization-2210.15140"/></url>
<url><loc>https://scifaro.com/en/abs/audio-signal-enhancement-with-learning-from-positive-and-unlabelled-data-2210.15143</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-signal-enhancement-with-learning-from-positive-and-unlabelled-data-2210.15143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-signal-enhancement-with-learning-from-positive-and-unlabelled-data-2210.15143"/></url>
<url><loc>https://scifaro.com/en/abs/articulation-gan-unsupervised-modeling-of-articulatory-learning-2210.15173</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/articulation-gan-unsupervised-modeling-of-articulatory-learning-2210.15173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/articulation-gan-unsupervised-modeling-of-articulatory-learning-2210.15173"/></url>
<url><loc>https://scifaro.com/en/abs/a-knowledge-driven-vowel-based-approach-of-depression-classification-from-speech-using-data-augmentation-2210.15261</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-knowledge-driven-vowel-based-approach-of-depression-classification-from-speech-using-data-augmentation-2210.15261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-knowledge-driven-vowel-based-approach-of-depression-classification-from-speech-using-data-augmentation-2210.15261"/></url>
<url><loc>https://scifaro.com/en/abs/on-out-of-distribution-detection-for-audio-with-deep-nearest-neighbors-2210.15283</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-out-of-distribution-detection-for-audio-with-deep-nearest-neighbors-2210.15283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-out-of-distribution-detection-for-audio-with-deep-nearest-neighbors-2210.15283"/></url>
<url><loc>https://scifaro.com/en/abs/san-a-robust-end-to-end-asr-model-architecture-2210.15285</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/san-a-robust-end-to-end-asr-model-architecture-2210.15285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/san-a-robust-end-to-end-asr-model-architecture-2210.15285"/></url>
<url><loc>https://scifaro.com/en/abs/deformable-temporal-convolutional-networks-for-monaural-noisy-reverberant-speech-separation-2210.15305</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deformable-temporal-convolutional-networks-for-monaural-noisy-reverberant-speech-separation-2210.15305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deformable-temporal-convolutional-networks-for-monaural-noisy-reverberant-speech-separation-2210.15305"/></url>
<url><loc>https://scifaro.com/en/abs/rigid-body-sound-synthesis-with-differentiable-modal-resonators-2210.15306</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rigid-body-sound-synthesis-with-differentiable-modal-resonators-2210.15306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rigid-body-sound-synthesis-with-differentiable-modal-resonators-2210.15306"/></url>
<url><loc>https://scifaro.com/en/abs/convolutive-block-matching-segmentation-algorithm-with-application-to-music-structure-analysis-2210.15356</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutive-block-matching-segmentation-algorithm-with-application-to-music-structure-analysis-2210.15356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutive-block-matching-segmentation-algorithm-with-application-to-music-structure-analysis-2210.15356"/></url>
<url><loc>https://scifaro.com/en/abs/explicit-intensity-control-for-accented-text-to-speech-2210.15364</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explicit-intensity-control-for-accented-text-to-speech-2210.15364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explicit-intensity-control-for-accented-text-to-speech-2210.15364"/></url>
<url><loc>https://scifaro.com/en/abs/a-training-and-inference-strategy-using-noisy-and-enhanced-speech-as-target-for-speech-enhancement-without-clean-speech-2210.15368</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-training-and-inference-strategy-using-noisy-and-enhanced-speech-as-target-for-speech-enhancement-without-clean-speech-2210.15368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-training-and-inference-strategy-using-noisy-and-enhanced-speech-as-target-for-speech-enhancement-without-clean-speech-2210.15368"/></url>
<url><loc>https://scifaro.com/en/abs/casnet-investigating-channel-robustness-for-speech-separation-2210.15370</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/casnet-investigating-channel-robustness-for-speech-separation-2210.15370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/casnet-investigating-channel-robustness-for-speech-separation-2210.15370"/></url>
<url><loc>https://scifaro.com/en/abs/opening-the-black-box-of-wav2vec-feature-encoder-2210.15386</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/opening-the-black-box-of-wav2vec-feature-encoder-2210.15386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/opening-the-black-box-of-wav2vec-feature-encoder-2210.15386"/></url>
<url><loc>https://scifaro.com/en/abs/freevc-towards-high-quality-text-free-one-shot-voice-conversion-2210.15418</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freevc-towards-high-quality-text-free-one-shot-voice-conversion-2210.15418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freevc-towards-high-quality-text-free-one-shot-voice-conversion-2210.15418"/></url>
<url><loc>https://scifaro.com/en/abs/toroidal-probabilistic-spherical-discriminant-analysis-2210.15441</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toroidal-probabilistic-spherical-discriminant-analysis-2210.15441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toroidal-probabilistic-spherical-discriminant-analysis-2210.15441"/></url>
<url><loc>https://scifaro.com/en/abs/virtuoso-massive-multilingual-speech-text-joint-semi-supervised-learning-for-text-to-speech-2210.15447</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/virtuoso-massive-multilingual-speech-text-joint-semi-supervised-learning-for-text-to-speech-2210.15447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/virtuoso-massive-multilingual-speech-text-joint-semi-supervised-learning-for-text-to-speech-2210.15447"/></url>
<url><loc>https://scifaro.com/en/abs/source-filter-hifi-gan-fast-and-pitch-controllable-high-fidelity-neural-vocoder-2210.15533</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-filter-hifi-gan-fast-and-pitch-controllable-high-fidelity-neural-vocoder-2210.15533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-filter-hifi-gan-fast-and-pitch-controllable-high-fidelity-neural-vocoder-2210.15533"/></url>
<url><loc>https://scifaro.com/en/abs/lyricjam-sonic-a-generative-system-for-real-time-composition-and-musical-improvisation-2210.15638</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lyricjam-sonic-a-generative-system-for-real-time-composition-and-musical-improvisation-2210.15638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lyricjam-sonic-a-generative-system-for-real-time-composition-and-musical-improvisation-2210.15638"/></url>
<url><loc>https://scifaro.com/en/abs/fedaudio-a-federated-learning-benchmark-for-audio-tasks-2210.15707</loc><lastmod>2023-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fedaudio-a-federated-learning-benchmark-for-audio-tasks-2210.15707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fedaudio-a-federated-learning-benchmark-for-audio-tasks-2210.15707"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-acoustic-matching-of-audio-signals-learning-to-hear-music-in-any-room-concert-hall-2210.15750</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-acoustic-matching-of-audio-signals-learning-to-hear-music-in-any-room-concert-hall-2210.15750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-acoustic-matching-of-audio-signals-learning-to-hear-music-in-any-room-concert-hall-2210.15750"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-role-of-visual-context-in-enriching-music-representations-2210.15828</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-role-of-visual-context-in-enriching-music-representations-2210.15828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-role-of-visual-context-in-enriching-music-representations-2210.15828"/></url>
<url><loc>https://scifaro.com/en/abs/gm-tcnet-gated-multi-scale-temporal-convolutional-network-using-emotion-causality-for-speech-emotion-recognition-2210.15834</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gm-tcnet-gated-multi-scale-temporal-convolutional-network-using-emotion-causality-for-speech-emotion-recognition-2210.15834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gm-tcnet-gated-multi-scale-temporal-convolutional-network-using-emotion-causality-for-speech-emotion-recognition-2210.15834"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-speaker-representation-for-target-speaker-extraction-2210.15849</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-speaker-representation-for-target-speaker-extraction-2210.15849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-speaker-representation-for-target-speaker-extraction-2210.15849"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-intelligent-neural-homomorphic-synthesis-2210.15853</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-intelligent-neural-homomorphic-synthesis-2210.15853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-intelligent-neural-homomorphic-synthesis-2210.15853"/></url>
<url><loc>https://scifaro.com/en/abs/residual-adapters-for-few-shot-text-to-speech-speaker-adaptation-2210.15868</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-adapters-for-few-shot-text-to-speech-speaker-adaptation-2210.15868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-adapters-for-few-shot-text-to-speech-speaker-adaptation-2210.15868"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-study-on-self-supervised-distillation-for-speaker-representation-learning-2210.15936</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-study-on-self-supervised-distillation-for-speaker-representation-learning-2210.15936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-study-on-self-supervised-distillation-for-speaker-representation-learning-2210.15936"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-effects-of-channel-sparsity-on-neural-network-pruning-for-acoustic-scene-classification-2210.15960</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-effects-of-channel-sparsity-on-neural-network-pruning-for-acoustic-scene-classification-2210.15960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-effects-of-channel-sparsity-on-neural-network-pruning-for-acoustic-scene-classification-2210.15960"/></url>
<url><loc>https://scifaro.com/en/abs/spectrograms-are-sequences-of-patches-2210.15988</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrograms-are-sequences-of-patches-2210.15988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrograms-are-sequences-of-patches-2210.15988"/></url>
<url><loc>https://scifaro.com/en/abs/towards-zero-shot-text-based-voice-editing-using-acoustic-context-conditioning-utterance-embeddings-and-reference-encoders-2210.16045</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-zero-shot-text-based-voice-editing-using-acoustic-context-conditioning-utterance-embeddings-and-reference-encoders-2210.16045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-zero-shot-text-based-voice-editing-using-acoustic-context-conditioning-utterance-embeddings-and-reference-encoders-2210.16045"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-object-detection-approaches-to-signal-identification-2210.16173</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-object-detection-approaches-to-signal-identification-2210.16173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-object-detection-approaches-to-signal-identification-2210.16173"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-respiratory-sound-representations-using-metadata-and-contrastive-learning-2210.16192</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-respiratory-sound-representations-using-metadata-and-contrastive-learning-2210.16192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-respiratory-sound-representations-using-metadata-and-contrastive-learning-2210.16192"/></url>
<url><loc>https://scifaro.com/en/abs/universal-speaker-recognition-encoders-for-different-speech-segments-duration-2210.16231</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-speaker-recognition-encoders-for-different-speech-segments-duration-2210.16231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-speaker-recognition-encoders-for-different-speech-segments-duration-2210.16231"/></url>
<url><loc>https://scifaro.com/en/abs/filter-and-evolve-progressive-pseudo-label-refining-for-semi-supervised-automatic-speech-recognition-2210.16318</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filter-and-evolve-progressive-pseudo-label-refining-for-semi-supervised-automatic-speech-recognition-2210.16318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filter-and-evolve-progressive-pseudo-label-refining-for-semi-supervised-automatic-speech-recognition-2210.16318"/></url>
<url><loc>https://scifaro.com/en/abs/heartsiam-a-domain-invariant-model-for-heart-sound-classification-2210.16394</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heartsiam-a-domain-invariant-model-for-heart-sound-classification-2210.16394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heartsiam-a-domain-invariant-model-for-heart-sound-classification-2210.16394"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-visual-dynamics-using-scene-graphs-for-audio-source-separation-2210.16472</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-visual-dynamics-using-scene-graphs-for-audio-source-separation-2210.16472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-visual-dynamics-using-scene-graphs-for-audio-source-separation-2210.16472"/></url>
<url><loc>https://scifaro.com/en/abs/relating-human-perception-of-musicality-to-prediction-in-a-predictive-coding-model-2210.16587</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relating-human-perception-of-musicality-to-prediction-in-a-predictive-coding-model-2210.16587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relating-human-perception-of-musicality-to-prediction-in-a-predictive-coding-model-2210.16587"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-the-discrete-and-continuous-emotion-labels-for-speech-emotion-recognition-2210.16642</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-the-discrete-and-continuous-emotion-labels-for-speech-emotion-recognition-2210.16642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-the-discrete-and-continuous-emotion-labels-for-speech-emotion-recognition-2210.16642"/></url>
<url><loc>https://scifaro.com/en/abs/symmetric-saliency-based-adversarial-attack-to-speaker-identification-2210.16777</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symmetric-saliency-based-adversarial-attack-to-speaker-identification-2210.16777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symmetric-saliency-based-adversarial-attack-to-speaker-identification-2210.16777"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-speech-quality-aware-complex-neural-network-for-acoustic-echo-cancellation-with-supervised-contrastive-learning-2210.16791</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-speech-quality-aware-complex-neural-network-for-acoustic-echo-cancellation-with-supervised-contrastive-learning-2210.16791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-speech-quality-aware-complex-neural-network-for-acoustic-echo-cancellation-with-supervised-contrastive-learning-2210.16791"/></url>
<url><loc>https://scifaro.com/en/abs/srtnet-time-domain-speech-enhancement-via-stochastic-refinement-2210.16805</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/srtnet-time-domain-speech-enhancement-via-stochastic-refinement-2210.16805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/srtnet-time-domain-speech-enhancement-via-stochastic-refinement-2210.16805"/></url>
<url><loc>https://scifaro.com/en/abs/tt-net-dual-path-transformer-based-sound-field-translation-in-the-spherical-harmonic-domain-2210.16849</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tt-net-dual-path-transformer-based-sound-field-translation-in-the-spherical-harmonic-domain-2210.16849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tt-net-dual-path-transformer-based-sound-field-translation-in-the-spherical-harmonic-domain-2210.16849"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizer-preset-interpolation-using-transformer-auto-encoders-2210.16984</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizer-preset-interpolation-using-transformer-auto-encoders-2210.16984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizer-preset-interpolation-using-transformer-auto-encoders-2210.16984"/></url>
<url><loc>https://scifaro.com/en/abs/wespeaker-a-research-and-production-oriented-speaker-embedding-learning-toolkit-2210.17016</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wespeaker-a-research-and-production-oriented-speaker-embedding-learning-toolkit-2210.17016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wespeaker-a-research-and-production-oriented-speaker-embedding-learning-toolkit-2210.17016"/></url>
<url><loc>https://scifaro.com/en/abs/joint-pre-training-with-speech-and-bilingual-text-for-direct-speech-to-speech-translation-2210.17027</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-pre-training-with-speech-and-bilingual-text-for-direct-speech-to-speech-translation-2210.17027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-pre-training-with-speech-and-bilingual-text-for-direct-speech-to-speech-translation-2210.17027"/></url>
<url><loc>https://scifaro.com/en/abs/fusionformer-fusing-operations-in-transformer-for-efficient-streaming-speech-recognition-2210.17079</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusionformer-fusing-operations-in-transformer-for-efficient-streaming-speech-recognition-2210.17079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusionformer-fusing-operations-in-transformer-for-efficient-streaming-speech-recognition-2210.17079"/></url>
<url><loc>https://scifaro.com/en/abs/structured-state-space-decoder-for-speech-recognition-and-synthesis-2210.17098</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structured-state-space-decoder-for-speech-recognition-and-synthesis-2210.17098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structured-state-space-decoder-for-speech-recognition-and-synthesis-2210.17098"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-train-and-test-time-augmentations-for-audio-language-learning-2210.17143</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-train-and-test-time-augmentations-for-audio-language-learning-2210.17143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-train-and-test-time-augmentations-for-audio-language-learning-2210.17143"/></url>
<url><loc>https://scifaro.com/en/abs/audio-time-scale-modification-with-temporal-compressing-networks-2210.17152</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-time-scale-modification-with-temporal-compressing-networks-2210.17152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-time-scale-modification-with-temporal-compressing-networks-2210.17152"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-hierarchical-metrical-structure-modeling-2210.17183</loc><lastmod>2023-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-hierarchical-metrical-structure-modeling-2210.17183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-hierarchical-metrical-structure-modeling-2210.17183"/></url>
<url><loc>https://scifaro.com/en/abs/combining-automatic-speaker-verification-and-prosody-analysis-for-synthetic-speech-detection-2210.17222</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-automatic-speaker-verification-and-prosody-analysis-for-synthetic-speech-detection-2210.17222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-automatic-speaker-verification-and-prosody-analysis-for-synthetic-speech-detection-2210.17222"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-text-to-speech-with-flow-based-voice-conversion-for-improved-pronunciation-2210.17264</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-text-to-speech-with-flow-based-voice-conversion-for-improved-pronunciation-2210.17264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-text-to-speech-with-flow-based-voice-conversion-for-improved-pronunciation-2210.17264"/></url>
<url><loc>https://scifaro.com/en/abs/accentspeech-learning-accent-from-crowd-sourced-data-for-target-speaker-tts-with-accents-2210.17305</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accentspeech-learning-accent-from-crowd-sourced-data-for-target-speaker-tts-with-accents-2210.17305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accentspeech-learning-accent-from-crowd-sourced-data-for-target-speaker-tts-with-accents-2210.17305"/></url>
<url><loc>https://scifaro.com/en/abs/robust-melgan-a-robust-universal-neural-vocoder-for-high-fidelity-tts-2210.17349</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-melgan-a-robust-universal-neural-vocoder-for-high-fidelity-tts-2210.17349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-melgan-a-robust-universal-neural-vocoder-for-high-fidelity-tts-2210.17349"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-detection-of-singing-techniques-in-repertoires-of-j-pop-solo-singers-2210.17367</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-detection-of-singing-techniques-in-repertoires-of-j-pop-solo-singers-2210.17367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-detection-of-singing-techniques-in-repertoires-of-j-pop-solo-singers-2210.17367"/></url>
<url><loc>https://scifaro.com/en/abs/active-learning-of-non-semantic-speech-tasks-with-pretrained-models-2211.00119</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-learning-of-non-semantic-speech-tasks-with-pretrained-models-2211.00119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-learning-of-non-semantic-speech-tasks-with-pretrained-models-2211.00119"/></url>
<url><loc>https://scifaro.com/en/abs/sdmuse-stochastic-differential-music-editing-and-generation-via-hybrid-representation-2211.00222</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sdmuse-stochastic-differential-music-editing-and-generation-via-hybrid-representation-2211.00222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sdmuse-stochastic-differential-music-editing-and-generation-via-hybrid-representation-2211.00222"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-content-aware-neural-text-to-speech-mos-prediction-using-prosodic-and-linguistic-features-2211.00342</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-content-aware-neural-text-to-speech-mos-prediction-using-prosodic-and-linguistic-features-2211.00342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-content-aware-neural-text-to-speech-mos-prediction-using-prosodic-and-linguistic-features-2211.00342"/></url>
<url><loc>https://scifaro.com/en/abs/generating-multilingual-gender-ambiguous-text-to-speech-voices-2211.00375</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-multilingual-gender-ambiguous-text-to-speech-voices-2211.00375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-multilingual-gender-ambiguous-text-to-speech-voices-2211.00375"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-black-box-audio-effects-with-time-varying-feature-modulation-2211.00497</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-black-box-audio-effects-with-time-varying-feature-modulation-2211.00497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-black-box-audio-effects-with-time-varying-feature-modulation-2211.00497"/></url>
<url><loc>https://scifaro.com/en/abs/trimtail-low-latency-streaming-asr-with-simple-but-effective-spectrogram-level-length-penalty-2211.00522</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trimtail-low-latency-streaming-asr-with-simple-but-effective-spectrogram-level-length-penalty-2211.00522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trimtail-low-latency-streaming-asr-with-simple-but-effective-spectrogram-level-length-penalty-2211.00522"/></url>
<url><loc>https://scifaro.com/en/abs/learning-utterance-level-representations-through-token-level-acoustic-latents-prediction-for-expressive-speech-synthesis-2211.00523</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-utterance-level-representations-through-token-level-acoustic-latents-prediction-for-expressive-speech-synthesis-2211.00523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-utterance-level-representations-through-token-level-acoustic-latents-prediction-for-expressive-speech-synthesis-2211.00523"/></url>
<url><loc>https://scifaro.com/en/abs/magnitude-or-phase-a-two-stage-algorithm-for-dereverberation-2211.00607</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/magnitude-or-phase-a-two-stage-algorithm-for-dereverberation-2211.00607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/magnitude-or-phase-a-two-stage-algorithm-for-dereverberation-2211.00607"/></url>
<url><loc>https://scifaro.com/en/abs/unified-end-to-end-speech-recognition-and-endpointing-for-fast-and-efficient-speech-systems-2211.00786</loc><lastmod>2023-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-end-to-end-speech-recognition-and-endpointing-for-fast-and-efficient-speech-systems-2211.00786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-end-to-end-speech-recognition-and-endpointing-for-fast-and-efficient-speech-systems-2211.00786"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-annotation-modality-on-label-quality-and-model-performance-in-the-automatic-assessment-of-laughter-in-the-wild-2211.00794</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-annotation-modality-on-label-quality-and-model-performance-in-the-automatic-assessment-of-laughter-in-the-wild-2211.00794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-annotation-modality-on-label-quality-and-model-performance-in-the-automatic-assessment-of-laughter-in-the-wild-2211.00794"/></url>
<url><loc>https://scifaro.com/en/abs/build-a-sre-challenge-system-lessons-from-voxsrc-2022-and-cnsrc-2022-2211.00815</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/build-a-sre-challenge-system-lessons-from-voxsrc-2022-and-cnsrc-2022-2211.00815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/build-a-sre-challenge-system-lessons-from-voxsrc-2022-and-cnsrc-2022-2211.00815"/></url>
<url><loc>https://scifaro.com/en/abs/conversation-oriented-asr-with-multi-look-ahead-cbs-architecture-2211.00858</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conversation-oriented-asr-with-multi-look-ahead-cbs-architecture-2211.00858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conversation-oriented-asr-with-multi-look-ahead-cbs-architecture-2211.00858"/></url>
<url><loc>https://scifaro.com/en/abs/pop2piano-pop-audio-based-piano-cover-generation-2211.00895</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pop2piano-pop-audio-based-piano-cover-generation-2211.00895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pop2piano-pop-audio-based-piano-cover-generation-2211.00895"/></url>
<url><loc>https://scifaro.com/en/abs/simd-size-aware-weight-regularization-for-fast-neural-vocoding-on-cpu-2211.00898</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simd-size-aware-weight-regularization-for-fast-neural-vocoding-on-cpu-2211.00898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simd-size-aware-weight-regularization-for-fast-neural-vocoding-on-cpu-2211.00898"/></url>
<url><loc>https://scifaro.com/en/abs/speechblender-speech-augmentation-framework-for-mispronunciation-data-generation-2211.00923</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechblender-speech-augmentation-framework-for-mispronunciation-data-generation-2211.00923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechblender-speech-augmentation-framework-for-mispronunciation-data-generation-2211.00923"/></url>
<url><loc>https://scifaro.com/en/abs/fast-u2-fast-and-accurate-end-to-end-speech-recognition-in-joint-ctc-attention-frames-2211.00941</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-u2-fast-and-accurate-end-to-end-speech-recognition-in-joint-ctc-attention-frames-2211.00941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-u2-fast-and-accurate-end-to-end-speech-recognition-in-joint-ctc-attention-frames-2211.00941"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-multi-style-speech-synthesis-with-timbre-and-style-disentanglement-2211.00967</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-multi-style-speech-synthesis-with-timbre-and-style-disentanglement-2211.00967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-multi-style-speech-synthesis-with-timbre-and-style-disentanglement-2211.00967"/></url>
<url><loc>https://scifaro.com/en/abs/spectromap-peak-detection-algorithm-for-audio-fingerprinting-2211.00982</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectromap-peak-detection-algorithm-for-audio-fingerprinting-2211.00982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectromap-peak-detection-algorithm-for-audio-fingerprinting-2211.00982"/></url>
<url><loc>https://scifaro.com/en/abs/a-weighted-variance-variational-autoencoder-model-for-speech-enhancement-2211.00990</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-weighted-variance-variational-autoencoder-model-for-speech-enhancement-2211.00990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-weighted-variance-variational-autoencoder-model-for-speech-enhancement-2211.00990"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-with-vibrato-modeling-and-latent-energy-representation-2211.00996</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-with-vibrato-modeling-and-latent-energy-representation-2211.00996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-with-vibrato-modeling-and-latent-energy-representation-2211.00996"/></url>
<url><loc>https://scifaro.com/en/abs/intermediate-fine-tuning-using-imperfect-synthetic-speech-for-improving-electrolaryngeal-speech-recognition-2211.01079</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intermediate-fine-tuning-using-imperfect-synthetic-speech-for-improving-electrolaryngeal-speech-recognition-2211.01079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intermediate-fine-tuning-using-imperfect-synthetic-speech-for-improving-electrolaryngeal-speech-recognition-2211.01079"/></url>
<url><loc>https://scifaro.com/en/abs/dspgan-a-gan-based-universal-vocoder-for-high-fidelity-tts-by-time-frequency-domain-supervision-from-dsp-2211.01087</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dspgan-a-gan-based-universal-vocoder-for-high-fidelity-tts-by-time-frequency-domain-supervision-from-dsp-2211.01087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dspgan-a-gan-based-universal-vocoder-for-high-fidelity-tts-by-time-frequency-domain-supervision-from-dsp-2211.01087"/></url>
<url><loc>https://scifaro.com/en/abs/audio-language-modeling-using-perceptually-guided-discrete-representations-2211.01223</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-language-modeling-using-perceptually-guided-discrete-representations-2211.01223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-language-modeling-using-perceptually-guided-discrete-representations-2211.01223"/></url>
<url><loc>https://scifaro.com/en/abs/a-quantum-kernel-learning-approach-to-acoustic-modeling-for-spoken-command-recognition-2211.01263</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-quantum-kernel-learning-approach-to-acoustic-modeling-for-spoken-command-recognition-2211.01263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-quantum-kernel-learning-approach-to-acoustic-modeling-for-spoken-command-recognition-2211.01263"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-music-genre-classification-with-cross-modal-neural-model-reprogramming-2211.01317</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-music-genre-classification-with-cross-modal-neural-model-reprogramming-2211.01317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-music-genre-classification-with-cross-modal-neural-model-reprogramming-2211.01317"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-phoneme-level-prosody-latents-using-ar-and-flow-based-prior-networks-for-expressive-speech-synthesis-2211.01327</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-phoneme-level-prosody-latents-using-ar-and-flow-based-prior-networks-for-expressive-speech-synthesis-2211.01327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-phoneme-level-prosody-latents-using-ar-and-flow-based-prior-networks-for-expressive-speech-synthesis-2211.01327"/></url>
<url><loc>https://scifaro.com/en/abs/the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-icsrc-dataset-tracks-baseline-and-results-2211.01585</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-icsrc-dataset-tracks-baseline-and-results-2211.01585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-icsrc-dataset-tracks-baseline-and-results-2211.01585"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-autoregression-a-novel-trick-to-improve-your-low-latency-speech-enhancement-model-2211.01751</loc><lastmod>2023-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-autoregression-a-novel-trick-to-improve-your-low-latency-speech-enhancement-model-2211.01751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-autoregression-a-novel-trick-to-improve-your-low-latency-speech-enhancement-model-2211.01751"/></url>
<url><loc>https://scifaro.com/en/abs/hypersound-generating-implicit-neural-representations-of-audio-signals-with-hypernetworks-2211.01839</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hypersound-generating-implicit-neural-representations-of-audio-signals-with-hypernetworks-2211.01839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hypersound-generating-implicit-neural-representations-of-audio-signals-with-hypernetworks-2211.01839"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-kernels-and-channel-attention-for-low-resource-speaker-verification-2211.02000</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-kernels-and-channel-attention-for-low-resource-speaker-verification-2211.02000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-kernels-and-channel-attention-for-low-resource-speaker-verification-2211.02000"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-target-sound-extraction-2211.02250</loc><lastmod>2023-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-target-sound-extraction-2211.02250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-target-sound-extraction-2211.02250"/></url>
<url><loc>https://scifaro.com/en/abs/wireless-deep-speech-semantic-transmission-2211.02283</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wireless-deep-speech-semantic-transmission-2211.02283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wireless-deep-speech-semantic-transmission-2211.02283"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-rendering-of-ambisonic-signals-by-neural-networks-2211.02301</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-rendering-of-ambisonic-signals-by-neural-networks-2211.02301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-rendering-of-ambisonic-signals-by-neural-networks-2211.02301"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-prosody-of-audiobook-text-to-speech-synthesis-with-acoustic-and-textual-contexts-2211.02336</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-prosody-of-audiobook-text-to-speech-synthesis-with-acoustic-and-textual-contexts-2211.02336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-prosody-of-audiobook-text-to-speech-synthesis-with-acoustic-and-textual-contexts-2211.02336"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-vgg-cct-cross-corpus-speech-emotion-recognition-with-speaker-embedding-and-vision-transformers-2211.02366</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-vgg-cct-cross-corpus-speech-emotion-recognition-with-speaker-embedding-and-vision-transformers-2211.02366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-vgg-cct-cross-corpus-speech-emotion-recognition-with-speaker-embedding-and-vision-transformers-2211.02366"/></url>
<url><loc>https://scifaro.com/en/abs/norespeech-knowledge-distillation-based-conditional-diffusion-model-for-noise-robust-expressive-tts-2211.02448</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/norespeech-knowledge-distillation-based-conditional-diffusion-model-for-noise-robust-expressive-tts-2211.02448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/norespeech-knowledge-distillation-based-conditional-diffusion-model-for-noise-robust-expressive-tts-2211.02448"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-efficient-speech-enhancement-with-variational-autoencoders-2211.02728</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-efficient-speech-enhancement-with-variational-autoencoders-2211.02728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-efficient-speech-enhancement-with-variational-autoencoders-2211.02728"/></url>
<url><loc>https://scifaro.com/en/abs/visinger-2-high-fidelity-end-to-end-singing-voice-synthesis-enhanced-by-digital-signal-processing-synthesizer-2211.02903</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visinger-2-high-fidelity-end-to-end-singing-voice-synthesis-enhanced-by-digital-signal-processing-synthesizer-2211.02903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visinger-2-high-fidelity-end-to-end-singing-voice-synthesis-enhanced-by-digital-signal-processing-synthesizer-2211.02903"/></url>
<url><loc>https://scifaro.com/en/abs/effective-audio-classification-network-based-on-paired-inverse-pyramid-structure-and-dense-mlp-block-2211.02940</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-audio-classification-network-based-on-paired-inverse-pyramid-structure-and-dense-mlp-block-2211.02940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-audio-classification-network-based-on-paired-inverse-pyramid-structure-and-dense-mlp-block-2211.02940"/></url>
<url><loc>https://scifaro.com/en/abs/i-hear-your-true-colors-image-guided-audio-generation-2211.03089</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-hear-your-true-colors-image-guided-audio-generation-2211.03089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-hear-your-true-colors-image-guided-audio-generation-2211.03089"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-sound-audio-classification-with-the-wigner-wille-distribution-and-convolutional-neural-networks-2211.03202</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-sound-audio-classification-with-the-wigner-wille-distribution-and-convolutional-neural-networks-2211.03202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-sound-audio-classification-with-the-wigner-wille-distribution-and-convolutional-neural-networks-2211.03202"/></url>
<url><loc>https://scifaro.com/en/abs/hi-kia-a-speech-emotion-recognition-dataset-for-wake-up-words-2211.03371</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hi-kia-a-speech-emotion-recognition-dataset-for-wake-up-words-2211.03371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hi-kia-a-speech-emotion-recognition-dataset-for-wake-up-words-2211.03371"/></url>
<url><loc>https://scifaro.com/en/abs/egocentric-audio-visual-noise-suppression-2211.03643</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/egocentric-audio-visual-noise-suppression-2211.03643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/egocentric-audio-visual-noise-suppression-2211.03643"/></url>
<url><loc>https://scifaro.com/en/abs/high-resolution-embedding-extractor-for-speaker-diarisation-2211.04060</loc><lastmod>2022-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-resolution-embedding-extractor-for-speaker-diarisation-2211.04060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-resolution-embedding-extractor-for-speaker-diarisation-2211.04060"/></url>
<url><loc>https://scifaro.com/en/abs/improving-performance-of-real-time-full-band-blind-packet-loss-concealment-with-predictive-network-2211.04071</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-performance-of-real-time-full-band-blind-packet-loss-concealment-with-predictive-network-2211.04071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-performance-of-real-time-full-band-blind-packet-loss-concealment-with-predictive-network-2211.04071"/></url>
<url><loc>https://scifaro.com/en/abs/ber-balanced-error-rate-for-speaker-diarization-2211.04304</loc><lastmod>2022-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ber-balanced-error-rate-for-speaker-diarization-2211.04304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ber-balanced-error-rate-for-speaker-diarization-2211.04304"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improved-room-impulse-response-estimation-for-speech-recognition-2211.04473</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improved-room-impulse-response-estimation-for-speech-recognition-2211.04473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improved-room-impulse-response-estimation-for-speech-recognition-2211.04473"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noisy-student-training-on-non-target-domain-data-for-automatic-speech-recognition-2211.04717</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noisy-student-training-on-non-target-domain-data-for-automatic-speech-recognition-2211.04717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noisy-student-training-on-non-target-domain-data-for-automatic-speech-recognition-2211.04717"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-large-scale-audio-tagging-via-transformer-to-cnn-knowledge-distillation-2211.04772</loc><lastmod>2023-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-large-scale-audio-tagging-via-transformer-to-cnn-knowledge-distillation-2211.04772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-large-scale-audio-tagging-via-transformer-to-cnn-knowledge-distillation-2211.04772"/></url>
<url><loc>https://scifaro.com/en/abs/global-and-local-optimization-beamforming-for-broadband-sources-2211.04921</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-and-local-optimization-beamforming-for-broadband-sources-2211.04921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-and-local-optimization-beamforming-for-broadband-sources-2211.04921"/></url>
<url><loc>https://scifaro.com/en/abs/emofake-an-initial-dataset-for-emotion-fake-audio-detection-2211.05363</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emofake-an-initial-dataset-for-emotion-fake-audio-detection-2211.05363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emofake-an-initial-dataset-for-emotion-fake-audio-detection-2211.05363"/></url>
<url><loc>https://scifaro.com/en/abs/ganstrument-adversarial-instrument-sound-synthesis-with-pitch-invariant-instance-conditioning-2211.05385</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ganstrument-adversarial-instrument-sound-synthesis-with-pitch-invariant-instance-conditioning-2211.05385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ganstrument-adversarial-instrument-sound-synthesis-with-pitch-invariant-instance-conditioning-2211.05385"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-fullband-subband-cross-attention-network-2211.05432</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-fullband-subband-cross-attention-network-2211.05432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-fullband-subband-cross-attention-network-2211.05432"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-utility-balanced-voice-de-identification-using-adversarial-examples-2211.05446</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-utility-balanced-voice-de-identification-using-adversarial-examples-2211.05446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-utility-balanced-voice-de-identification-using-adversarial-examples-2211.05446"/></url>
<url><loc>https://scifaro.com/en/abs/vis2mus-exploring-multimodal-representation-mapping-for-controllable-music-generation-2211.05543</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vis2mus-exploring-multimodal-representation-mapping-for-controllable-music-generation-2211.05543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vis2mus-exploring-multimodal-representation-mapping-for-controllable-music-generation-2211.05543"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-condition-training-for-target-source-separation-2211.05927</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-condition-training-for-target-source-separation-2211.05927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-condition-training-for-target-source-separation-2211.05927"/></url>
<url><loc>https://scifaro.com/en/abs/a-gait-triaging-toolkit-for-overlapping-acoustic-events-in-indoor-environments-2211.05944</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-gait-triaging-toolkit-for-overlapping-acoustic-events-in-indoor-environments-2211.05944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-gait-triaging-toolkit-for-overlapping-acoustic-events-in-indoor-environments-2211.05944"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-pornography-recognition-using-convolutional-neural-networks-and-bag-of-refinements-2211.05983</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-pornography-recognition-using-convolutional-neural-networks-and-bag-of-refinements-2211.05983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-pornography-recognition-using-convolutional-neural-networks-and-bag-of-refinements-2211.05983"/></url>
<url><loc>https://scifaro.com/en/abs/scenefake-an-initial-dataset-and-benchmarks-for-scene-fake-audio-detection-2211.06073</loc><lastmod>2024-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scenefake-an-initial-dataset-and-benchmarks-for-scene-fake-audio-detection-2211.06073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scenefake-an-initial-dataset-and-benchmarks-for-scene-fake-audio-detection-2211.06073"/></url>
<url><loc>https://scifaro.com/en/abs/maskedspeech-context-aware-speech-synthesis-with-masking-strategy-2211.06170</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maskedspeech-context-aware-speech-synthesis-with-masking-strategy-2211.06170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maskedspeech-context-aware-speech-synthesis-with-masking-strategy-2211.06170"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-robustness-of-non-intrusive-speech-quality-model-by-adversarial-examples-2211.06508</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-robustness-of-non-intrusive-speech-quality-model-by-adversarial-examples-2211.06508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-robustness-of-non-intrusive-speech-quality-model-by-adversarial-examples-2211.06508"/></url>
<url><loc>https://scifaro.com/en/abs/low-pass-filtering-and-bandwidth-extension-for-robust-anti-spoofing-countermeasure-against-codec-variabilities-2211.06546</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-pass-filtering-and-bandwidth-extension-for-robust-anti-spoofing-countermeasure-against-codec-variabilities-2211.06546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-pass-filtering-and-bandwidth-extension-for-robust-anti-spoofing-countermeasure-against-codec-variabilities-2211.06546"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-robustness-of-distilhubert-to-unseen-noisy-conditions-via-data-augmentation-curriculum-learning-and-multi-task-enhancement-2211.06562</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-robustness-of-distilhubert-to-unseen-noisy-conditions-via-data-augmentation-curriculum-learning-and-multi-task-enhancement-2211.06562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-robustness-of-distilhubert-to-unseen-noisy-conditions-via-data-augmentation-curriculum-learning-and-multi-task-enhancement-2211.06562"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-contrastive-language-audio-pretraining-with-feature-fusion-and-keyword-to-caption-augmentation-2211.06687</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-contrastive-language-audio-pretraining-with-feature-fusion-and-keyword-to-caption-augmentation-2211.06687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-contrastive-language-audio-pretraining-with-feature-fusion-and-keyword-to-caption-augmentation-2211.06687"/></url>
<url><loc>https://scifaro.com/en/abs/autovocoder-fast-waveform-generation-from-a-learned-speech-representation-using-differentiable-digital-signal-processing-2211.06989</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autovocoder-fast-waveform-generation-from-a-learned-speech-representation-using-differentiable-digital-signal-processing-2211.06989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autovocoder-fast-waveform-generation-from-a-learned-speech-representation-using-differentiable-digital-signal-processing-2211.06989"/></url>
<url><loc>https://scifaro.com/en/abs/ym2413-mdb-a-multi-instrumental-fm-video-game-music-dataset-with-emotion-annotations-2211.07131</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ym2413-mdb-a-multi-instrumental-fm-video-game-music-dataset-with-emotion-annotations-2211.07131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ym2413-mdb-a-multi-instrumental-fm-video-game-music-dataset-with-emotion-annotations-2211.07131"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-device-and-audio-data-to-tag-music-with-user-aware-listening-contexts-2211.07250</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-device-and-audio-data-to-tag-music-with-user-aware-listening-contexts-2211.07250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-device-and-audio-data-to-tag-music-with-user-aware-listening-contexts-2211.07250"/></url>
<url><loc>https://scifaro.com/en/abs/medleyvox-an-evaluation-dataset-for-multiple-singing-voices-separation-2211.07302</loc><lastmod>2023-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/medleyvox-an-evaluation-dataset-for-multiple-singing-voices-separation-2211.07302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/medleyvox-an-evaluation-dataset-for-multiple-singing-voices-separation-2211.07302"/></url>
<url><loc>https://scifaro.com/en/abs/describing-emotions-with-acoustic-property-prompts-for-speech-emotion-recognition-2211.07737</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/describing-emotions-with-acoustic-property-prompts-for-speech-emotion-recognition-2211.07737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/describing-emotions-with-acoustic-property-prompts-for-speech-emotion-recognition-2211.07737"/></url>
<url><loc>https://scifaro.com/en/abs/music-similarity-calculation-of-individual-instrumental-sounds-using-metric-learning-2211.07863</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-similarity-calculation-of-individual-instrumental-sounds-using-metric-learning-2211.07863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-similarity-calculation-of-individual-instrumental-sounds-using-metric-learning-2211.07863"/></url>
<url><loc>https://scifaro.com/en/abs/show-me-the-instruments-musical-instrument-retrieval-from-mixture-audio-2211.07951</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/show-me-the-instruments-musical-instrument-retrieval-from-mixture-audio-2211.07951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/show-me-the-instruments-musical-instrument-retrieval-from-mixture-audio-2211.07951"/></url>
<url><loc>https://scifaro.com/en/abs/ssm-net-feature-learning-for-music-structure-analysis-using-a-self-similarity-matrix-based-loss-2211.08141</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssm-net-feature-learning-for-music-structure-analysis-using-a-self-similarity-matrix-based-loss-2211.08141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssm-net-feature-learning-for-music-structure-analysis-using-a-self-similarity-matrix-based-loss-2211.08141"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-modeling-matters-a-novel-temporal-emotional-modeling-approach-for-speech-emotion-recognition-2211.08233</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-modeling-matters-a-novel-temporal-emotional-modeling-approach-for-speech-emotion-recognition-2211.08233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-modeling-matters-a-novel-temporal-emotional-modeling-approach-for-speech-emotion-recognition-2211.08233"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-speech-emotion-recognition-with-multi-gating-mechanism-and-neural-architecture-search-2211.08237</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-speech-emotion-recognition-with-multi-gating-mechanism-and-neural-architecture-search-2211.08237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-speech-emotion-recognition-with-multi-gating-mechanism-and-neural-architecture-search-2211.08237"/></url>
<url><loc>https://scifaro.com/en/abs/online-phase-reconstruction-via-dnn-based-phase-differences-estimation-2211.08246</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-phase-reconstruction-via-dnn-based-phase-differences-estimation-2211.08246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-phase-reconstruction-via-dnn-based-phase-differences-estimation-2211.08246"/></url>
<url><loc>https://scifaro.com/en/abs/flowgrad-using-motion-for-visual-sound-source-localization-2211.08367</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowgrad-using-motion-for-visual-sound-source-localization-2211.08367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowgrad-using-motion-for-visual-sound-source-localization-2211.08367"/></url>
<url><loc>https://scifaro.com/en/abs/music-instrument-classification-reprogrammed-2211.08379</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-instrument-classification-reprogrammed-2211.08379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-instrument-classification-reprogrammed-2211.08379"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-heteroscedastic-uncertainty-in-learning-complex-spectral-mapping-for-single-channel-speech-enhancement-2211.08624</loc><lastmod>2023-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-heteroscedastic-uncertainty-in-learning-complex-spectral-mapping-for-single-channel-speech-enhancement-2211.08624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-heteroscedastic-uncertainty-in-learning-complex-spectral-mapping-for-single-channel-speech-enhancement-2211.08624"/></url>
<url><loc>https://scifaro.com/en/abs/pbsm-backdoor-attack-against-keyword-spotting-based-on-pitch-boosting-and-sound-masking-2211.08697</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pbsm-backdoor-attack-against-keyword-spotting-based-on-pitch-boosting-and-sound-masking-2211.08697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pbsm-backdoor-attack-against-keyword-spotting-based-on-pitch-boosting-and-sound-masking-2211.08697"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-detection-based-method-for-speaker-diarization-ego4d-audio-only-diarization-challenge-2022-2211.08708</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-detection-based-method-for-speaker-diarization-ego4d-audio-only-diarization-challenge-2022-2211.08708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-detection-based-method-for-speaker-diarization-ego4d-audio-only-diarization-challenge-2022-2211.08708"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-variational-autoencoder-to-improve-neural-audio-synthesis-for-polyphonic-music-sound-2211.08715</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-variational-autoencoder-to-improve-neural-audio-synthesis-for-polyphonic-music-sound-2211.08715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-variational-autoencoder-to-improve-neural-audio-synthesis-for-polyphonic-music-sound-2211.08715"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptation-for-end-to-end-speech-recognition-systems-in-noisy-environments-2211.08774</loc><lastmod>2023-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptation-for-end-to-end-speech-recognition-systems-in-noisy-environments-2211.08774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptation-for-end-to-end-speech-recognition-systems-in-noisy-environments-2211.08774"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-emotion-recognition-with-unsupervised-speaking-style-transfer-2211.08843</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-with-unsupervised-speaking-style-transfer-2211.08843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-with-unsupervised-speaking-style-transfer-2211.08843"/></url>
<url><loc>https://scifaro.com/en/abs/rapid-connectionist-speaker-adaptation-2211.08978</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rapid-connectionist-speaker-adaptation-2211.08978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rapid-connectionist-speaker-adaptation-2211.08978"/></url>
<url><loc>https://scifaro.com/en/abs/is-my-automatic-audio-captioning-system-so-bad-spider-max-a-metric-to-consider-several-caption-candidates-2211.08983</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-my-automatic-audio-captioning-system-so-bad-spider-max-a-metric-to-consider-several-caption-candidates-2211.08983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-my-automatic-audio-captioning-system-so-bad-spider-max-a-metric-to-consider-several-caption-candidates-2211.08983"/></url>
<url><loc>https://scifaro.com/en/abs/psychophysiology-aided-perceptually-fluent-speech-analysis-of-children-who-stutter-2211.09089</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psychophysiology-aided-perceptually-fluent-speech-analysis-of-children-who-stutter-2211.09089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psychophysiology-aided-perceptually-fluent-speech-analysis-of-children-who-stutter-2211.09089"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-intelligent-music-generation-systems-2211.09124</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-intelligent-music-generation-systems-2211.09124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-intelligent-music-generation-systems-2211.09124"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-mongolian-speech-synthesis-based-on-automatic-prosody-annotation-2211.09365</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-mongolian-speech-synthesis-based-on-automatic-prosody-annotation-2211.09365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-mongolian-speech-synthesis-based-on-automatic-prosody-annotation-2211.09365"/></url>
<url><loc>https://scifaro.com/en/abs/balanced-deep-cca-for-bird-vocalization-detection-2211.09376</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/balanced-deep-cca-for-bird-vocalization-detection-2211.09376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/balanced-deep-cca-for-bird-vocalization-detection-2211.09376"/></url>
<url><loc>https://scifaro.com/en/abs/token-level-speaker-change-detection-using-speaker-difference-and-speech-content-via-continuous-integrate-and-fire-2211.09381</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/token-level-speaker-change-detection-using-speaker-difference-and-speech-content-via-continuous-integrate-and-fire-2211.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/token-level-speaker-change-detection-using-speaker-difference-and-speech-content-via-continuous-integrate-and-fire-2211.09381"/></url>
<url><loc>https://scifaro.com/en/abs/commu-dataset-for-combinatorial-music-generation-2211.09385</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/commu-dataset-for-combinatorial-music-generation-2211.09385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/commu-dataset-for-combinatorial-music-generation-2211.09385"/></url>
<url><loc>https://scifaro.com/en/abs/nansy-unified-voice-synthesis-with-neural-analysis-and-synthesis-2211.09407</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nansy-unified-voice-synthesis-with-neural-analysis-and-synthesis-2211.09407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nansy-unified-voice-synthesis-with-neural-analysis-and-synthesis-2211.09407"/></url>
<url><loc>https://scifaro.com/en/abs/longfnt-long-form-speech-recognition-with-factorized-neural-transducer-2211.09412</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/longfnt-long-form-speech-recognition-with-factorized-neural-transducer-2211.09412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/longfnt-long-form-speech-recognition-with-factorized-neural-transducer-2211.09412"/></url>
<url><loc>https://scifaro.com/en/abs/back-translation-style-data-augmentation-for-mandarin-chinese-polyphone-disambiguation-2211.09495</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/back-translation-style-data-augmentation-for-mandarin-chinese-polyphone-disambiguation-2211.09495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/back-translation-style-data-augmentation-for-mandarin-chinese-polyphone-disambiguation-2211.09495"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2211.09503</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2211.09503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2211.09503"/></url>
<url><loc>https://scifaro.com/en/abs/heart-abnormality-detection-from-heart-sound-signals-using-mfcc-feature-and-dual-stream-attention-based-network-2211.09751</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heart-abnormality-detection-from-heart-sound-signals-using-mfcc-feature-and-dual-stream-attention-based-network-2211.09751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heart-abnormality-detection-from-heart-sound-signals-using-mfcc-feature-and-dual-stream-attention-based-network-2211.09751"/></url>
<url><loc>https://scifaro.com/en/abs/robust-vocal-quality-feature-embeddings-for-dysphonic-voice-detection-2211.09858</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-vocal-quality-feature-embeddings-for-dysphonic-voice-detection-2211.09858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-vocal-quality-feature-embeddings-for-dysphonic-voice-detection-2211.09858"/></url>
<url><loc>https://scifaro.com/en/abs/audio-anti-spoofing-using-a-simple-attention-module-and-joint-optimization-based-on-additive-angular-margin-loss-and-meta-learning-2211.09898</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-anti-spoofing-using-a-simple-attention-module-and-joint-optimization-based-on-additive-angular-margin-loss-and-meta-learning-2211.09898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-anti-spoofing-using-a-simple-attention-module-and-joint-optimization-based-on-additive-angular-margin-loss-and-meta-learning-2211.09898"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-domain-adaptation-for-text-independent-forensic-speaker-recognition-2211.09913</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-domain-adaptation-for-text-independent-forensic-speaker-recognition-2211.09913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-domain-adaptation-for-text-independent-forensic-speaker-recognition-2211.09913"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-overlap-aware-neural-diarization-for-multi-party-meeting-analysis-2211.10243</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-overlap-aware-neural-diarization-for-multi-party-meeting-analysis-2211.10243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-overlap-aware-neural-diarization-for-multi-party-meeting-analysis-2211.10243"/></url>
<url><loc>https://scifaro.com/en/abs/edge-editable-dance-generation-from-music-2211.10658</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edge-editable-dance-generation-from-music-2211.10658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edge-editable-dance-generation-from-music-2211.10658"/></url>
<url><loc>https://scifaro.com/en/abs/phonemic-adversarial-attack-against-audio-recognition-in-real-world-2211.10661</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonemic-adversarial-attack-against-audio-recognition-in-real-world-2211.10661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonemic-adversarial-attack-against-audio-recognition-in-real-world-2211.10661"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-regularization-for-multimodal-emotion-recognition-using-audio-and-text-2211.10885</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-regularization-for-multimodal-emotion-recognition-using-audio-and-text-2211.10885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-regularization-for-multimodal-emotion-recognition-using-audio-and-text-2211.10885"/></url>
<url><loc>https://scifaro.com/en/abs/la-voce-low-snr-audio-visual-speech-enhancement-using-neural-vocoders-2211.10999</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/la-voce-low-snr-audio-visual-speech-enhancement-using-neural-vocoders-2211.10999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/la-voce-low-snr-audio-visual-speech-enhancement-using-neural-vocoders-2211.10999"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-efficacy-of-pre-trained-checkpoints-in-text-to-music-generation-task-2211.11216</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-efficacy-of-pre-trained-checkpoints-in-text-to-music-generation-task-2211.11216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-efficacy-of-pre-trained-checkpoints-in-text-to-music-generation-task-2211.11216"/></url>
<url><loc>https://scifaro.com/en/abs/timbreclip-connecting-timbre-to-text-and-images-2211.11225</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbreclip-connecting-timbre-to-text-and-images-2211.11225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbreclip-connecting-timbre-to-text-and-images-2211.11225"/></url>
<url><loc>https://scifaro.com/en/abs/sscformer-push-the-limit-of-chunk-wise-conformer-for-streaming-asr-using-sequentially-sampled-chunks-and-chunked-causal-convolution-2211.11419</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sscformer-push-the-limit-of-chunk-wise-conformer-for-streaming-asr-using-sequentially-sampled-chunks-and-chunked-causal-convolution-2211.11419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sscformer-push-the-limit-of-chunk-wise-conformer-for-streaming-asr-using-sequentially-sampled-chunks-and-chunked-causal-convolution-2211.11419"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-for-greek-traditional-and-folk-music-lyra-2211.11479</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-for-greek-traditional-and-folk-music-lyra-2211.11479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-for-greek-traditional-and-folk-music-lyra-2211.11479"/></url>
<url><loc>https://scifaro.com/en/abs/latent-iterative-refinement-for-modular-source-separation-2211.11917</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-iterative-refinement-for-modular-source-separation-2211.11917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-iterative-refinement-for-modular-source-separation-2211.11917"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-feature-learning-for-real-time-neural-speech-coding-2211.11960</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-feature-learning-for-real-time-neural-speech-coding-2211.11960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-feature-learning-for-real-time-neural-speech-coding-2211.11960"/></url>
<url><loc>https://scifaro.com/en/abs/taylorbeamixer-learning-taylor-inspired-all-neural-multi-channel-speech-enhancement-from-beam-space-dictionary-perspective-2211.12024</loc><lastmod>2022-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taylorbeamixer-learning-taylor-inspired-all-neural-multi-channel-speech-enhancement-from-beam-space-dictionary-perspective-2211.12024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taylorbeamixer-learning-taylor-inspired-all-neural-multi-channel-speech-enhancement-from-beam-space-dictionary-perspective-2211.12024"/></url>
<url><loc>https://scifaro.com/en/abs/robust-training-for-speaker-verification-against-noisy-labels-2211.12080</loc><lastmod>2026-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-training-for-speaker-verification-against-noisy-labels-2211.12080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-training-for-speaker-verification-against-noisy-labels-2211.12080"/></url>
<url><loc>https://scifaro.com/en/abs/aero-audio-super-resolution-in-the-spectral-domain-2211.12232</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aero-audio-super-resolution-in-the-spectral-domain-2211.12232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aero-audio-super-resolution-in-the-spectral-domain-2211.12232"/></url>
<url><loc>https://scifaro.com/en/abs/tf-gridnet-integrating-full-and-sub-band-modeling-for-speech-separation-2211.12433</loc><lastmod>2023-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tf-gridnet-integrating-full-and-sub-band-modeling-for-speech-separation-2211.12433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tf-gridnet-integrating-full-and-sub-band-modeling-for-speech-separation-2211.12433"/></url>
<url><loc>https://scifaro.com/en/abs/imasc-icfoss-malayalam-speech-corpus-2211.12796</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imasc-icfoss-malayalam-speech-corpus-2211.12796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imasc-icfoss-malayalam-speech-corpus-2211.12796"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-typicality-of-musical-sequences-2211.13016</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-typicality-of-musical-sequences-2211.13016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-typicality-of-musical-sequences-2211.13016"/></url>
<url><loc>https://scifaro.com/en/abs/asit-local-global-audio-spectrogram-vision-transformer-for-event-classification-2211.13189</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asit-local-global-audio-spectrogram-vision-transformer-for-event-classification-2211.13189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asit-local-global-audio-spectrogram-vision-transformer-for-event-classification-2211.13189"/></url>
<url><loc>https://scifaro.com/en/abs/voice-preserving-zero-shot-multiple-accent-conversion-2211.13282</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-preserving-zero-shot-multiple-accent-conversion-2211.13282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-preserving-zero-shot-multiple-accent-conversion-2211.13282"/></url>
<url><loc>https://scifaro.com/en/abs/tessp-text-enhanced-self-supervised-speech-pre-training-2211.13443</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tessp-text-enhanced-self-supervised-speech-pre-training-2211.13443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tessp-text-enhanced-self-supervised-speech-pre-training-2211.13443"/></url>
<url><loc>https://scifaro.com/en/abs/can-knowledge-of-end-to-end-text-to-speech-models-improve-neural-midi-to-audio-synthesis-systems-2211.13868</loc><lastmod>2023-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-knowledge-of-end-to-end-text-to-speech-models-improve-neural-midi-to-audio-synthesis-systems-2211.13868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-knowledge-of-end-to-end-text-to-speech-models-improve-neural-midi-to-audio-synthesis-systems-2211.13868"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-incremental-text-to-speech-on-gpus-2211.13939</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-incremental-text-to-speech-on-gpus-2211.13939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-incremental-text-to-speech-on-gpus-2211.13939"/></url>
<url><loc>https://scifaro.com/en/abs/learning-general-audio-representations-with-large-scale-training-of-patchout-audio-transformers-2211.13956</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-general-audio-representations-with-large-scale-training-of-patchout-audio-transformers-2211.13956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-general-audio-representations-with-large-scale-training-of-patchout-audio-transformers-2211.13956"/></url>
<url><loc>https://scifaro.com/en/abs/puffin-pitch-synchronous-neural-waveform-generation-for-fullband-speech-on-modest-devices-2211.14130</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/puffin-pitch-synchronous-neural-waveform-generation-for-fullband-speech-on-modest-devices-2211.14130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/puffin-pitch-synchronous-neural-waveform-generation-for-fullband-speech-on-modest-devices-2211.14130"/></url>
<url><loc>https://scifaro.com/en/abs/automated-detection-of-dolphin-whistles-with-convolutional-networks-and-transfer-learning-2211.15406</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-detection-of-dolphin-whistles-with-convolutional-networks-and-transfer-learning-2211.15406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-detection-of-dolphin-whistles-with-convolutional-networks-and-transfer-learning-2211.15406"/></url>
<url><loc>https://scifaro.com/en/abs/musfa-improving-music-structural-function-analysis-with-partially-labeled-data-2211.15787</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musfa-improving-music-structural-function-analysis-with-partially-labeled-data-2211.15787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musfa-improving-music-structural-function-analysis-with-partially-labeled-data-2211.15787"/></url>
<url><loc>https://scifaro.com/en/abs/ok-computer-analysis-an-audio-corpus-study-of-radiohead-2211.15834</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ok-computer-analysis-an-audio-corpus-study-of-radiohead-2211.15834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ok-computer-analysis-an-audio-corpus-study-of-radiohead-2211.15834"/></url>
<url><loc>https://scifaro.com/en/abs/neural-vocoder-feature-estimation-for-dry-singing-voice-separation-2211.15948</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-vocoder-feature-estimation-for-dry-singing-voice-separation-2211.15948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-vocoder-feature-estimation-for-dry-singing-voice-separation-2211.15948"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-2211.15974</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-2211.15974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-2211.15974"/></url>
<url><loc>https://scifaro.com/en/abs/model-extraction-attack-against-self-supervised-speech-models-2211.16044</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-extraction-attack-against-self-supervised-speech-models-2211.16044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-extraction-attack-against-self-supervised-speech-models-2211.16044"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-speech-synthesis-by-learning-discrete-phoneme-level-prosodic-representations-2211.16307</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-speech-synthesis-by-learning-discrete-phoneme-level-prosodic-representations-2211.16307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-speech-synthesis-by-learning-discrete-phoneme-level-prosodic-representations-2211.16307"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-unfolding-speech-enhancement-method-motivated-by-taylor-s-theorem-2211.16764</loc><lastmod>2023-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-unfolding-speech-enhancement-method-motivated-by-taylor-s-theorem-2211.16764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-unfolding-speech-enhancement-method-motivated-by-taylor-s-theorem-2211.16764"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-virtually-train-your-speaker-localizer-2211.16958</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-virtually-train-your-speaker-localizer-2211.16958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-virtually-train-your-speaker-localizer-2211.16958"/></url>
<url><loc>https://scifaro.com/en/abs/topological-data-analysis-for-speech-processing-2211.17223</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topological-data-analysis-for-speech-processing-2211.17223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topological-data-analysis-for-speech-processing-2211.17223"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-techniques-for-monaural-speech-enhancement-state-of-the-art-analysis-2212.00369</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-techniques-for-monaural-speech-enhancement-state-of-the-art-analysis-2212.00369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-techniques-for-monaural-speech-enhancement-state-of-the-art-analysis-2212.00369"/></url>
<url><loc>https://scifaro.com/en/abs/a-domain-knowledge-inspired-music-embedding-space-and-a-novel-attention-mechanism-for-symbolic-music-modeling-2212.00973</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-domain-knowledge-inspired-music-embedding-space-and-a-novel-attention-mechanism-for-symbolic-music-modeling-2212.00973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-domain-knowledge-inspired-music-embedding-space-and-a-novel-attention-mechanism-for-symbolic-music-modeling-2212.00973"/></url>
<url><loc>https://scifaro.com/en/abs/sonus-texere-automated-dense-soundtrack-construction-for-books-using-movie-adaptations-2212.01033</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonus-texere-automated-dense-soundtrack-construction-for-books-using-movie-adaptations-2212.01033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonus-texere-automated-dense-soundtrack-construction-for-books-using-movie-adaptations-2212.01033"/></url>
<url><loc>https://scifaro.com/en/abs/accear-accelerometer-acoustic-eavesdropping-with-unconstrained-vocabulary-2212.01042</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accear-accelerometer-acoustic-eavesdropping-with-unconstrained-vocabulary-2212.01042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accear-accelerometer-acoustic-eavesdropping-with-unconstrained-vocabulary-2212.01042"/></url>
<url><loc>https://scifaro.com/en/abs/neal-an-open-source-tool-for-audio-annotation-2212.01457</loc><lastmod>2022-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neal-an-open-source-tool-for-audio-annotation-2212.01457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neal-an-open-source-tool-for-audio-annotation-2212.01457"/></url>
<url><loc>https://scifaro.com/en/abs/unisyn-an-end-to-end-unified-model-for-text-to-speech-and-singing-voice-synthesis-2212.01546</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unisyn-an-end-to-end-unified-model-for-text-to-speech-and-singing-voice-synthesis-2212.01546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unisyn-an-end-to-end-unified-model-for-text-to-speech-and-singing-voice-synthesis-2212.01546"/></url>
<url><loc>https://scifaro.com/en/abs/generative-models-for-improved-naturalness-intelligibility-and-voicing-of-whispered-speech-2212.01775</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-models-for-improved-naturalness-intelligibility-and-voicing-of-whispered-speech-2212.01775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-models-for-improved-naturalness-intelligibility-and-voicing-of-whispered-speech-2212.01775"/></url>
<url><loc>https://scifaro.com/en/abs/melody-transcription-via-generative-pre-training-2212.01884</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-transcription-via-generative-pre-training-2212.01884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-transcription-via-generative-pre-training-2212.01884"/></url>
<url><loc>https://scifaro.com/en/abs/speech-mos-multi-task-learning-and-rater-bias-correction-2212.01911</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-mos-multi-task-learning-and-rater-bias-correction-2212.01911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-mos-multi-task-learning-and-rater-bias-correction-2212.01911"/></url>
<url><loc>https://scifaro.com/en/abs/nbc2-multichannel-speech-separation-with-revised-narrow-band-conformer-2212.02076</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nbc2-multichannel-speech-separation-with-revised-narrow-band-conformer-2212.02076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nbc2-multichannel-speech-separation-with-revised-narrow-band-conformer-2212.02076"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-recording-device-identification-based-on-deep-representation-learning-2212.02084</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-recording-device-identification-based-on-deep-representation-learning-2212.02084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-recording-device-identification-based-on-deep-representation-learning-2212.02084"/></url>
<url><loc>https://scifaro.com/en/abs/dear-a-deep-learning-based-audio-re-recording-resilient-watermarking-2212.02339</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dear-a-deep-learning-based-audio-re-recording-resilient-watermarking-2212.02339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dear-a-deep-learning-based-audio-re-recording-resilient-watermarking-2212.02339"/></url>
<url><loc>https://scifaro.com/en/abs/map-music2vec-a-simple-and-effective-baseline-for-self-supervised-music-audio-representation-learning-2212.02508</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/map-music2vec-a-simple-and-effective-baseline-for-self-supervised-music-audio-representation-learning-2212.02508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/map-music2vec-a-simple-and-effective-baseline-for-self-supervised-music-audio-representation-learning-2212.02508"/></url>
<url><loc>https://scifaro.com/en/abs/audio-latent-space-cartography-2212.02610</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-latent-space-cartography-2212.02610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-latent-space-cartography-2212.02610"/></url>
<url><loc>https://scifaro.com/en/abs/covariance-regularization-for-probabilistic-linear-discriminant-analysis-2212.03039</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covariance-regularization-for-probabilistic-linear-discriminant-analysis-2212.03039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covariance-regularization-for-probabilistic-linear-discriminant-analysis-2212.03039"/></url>
<url><loc>https://scifaro.com/en/abs/label-free-knowledge-distillation-with-contrastive-loss-for-light-weight-speaker-recognition-2212.03090</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-free-knowledge-distillation-with-contrastive-loss-for-light-weight-speaker-recognition-2212.03090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-free-knowledge-distillation-with-contrastive-loss-for-light-weight-speaker-recognition-2212.03090"/></url>
<url><loc>https://scifaro.com/en/abs/improve-bilingual-tts-using-dynamic-language-and-phonology-embedding-2212.03435</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improve-bilingual-tts-using-dynamic-language-and-phonology-embedding-2212.03435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improve-bilingual-tts-using-dynamic-language-and-phonology-embedding-2212.03435"/></url>
<url><loc>https://scifaro.com/en/abs/variational-speech-waveform-compression-to-catalyze-semantic-communications-2212.05294</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-speech-waveform-compression-to-catalyze-semantic-communications-2212.05294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-speech-waveform-compression-to-catalyze-semantic-communications-2212.05294"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-modality-specific-representations-for-audio-visual-speech-recognition-via-reinforcement-learning-2212.05301</loc><lastmod>2023-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-modality-specific-representations-for-audio-visual-speech-recognition-via-reinforcement-learning-2212.05301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-modality-specific-representations-for-audio-visual-speech-recognition-via-reinforcement-learning-2212.05301"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-audio-preprocessing-techniques-and-deep-learning-algorithms-for-raga-recognition-2212.05335</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-audio-preprocessing-techniques-and-deep-learning-algorithms-for-raga-recognition-2212.05335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-audio-preprocessing-techniques-and-deep-learning-algorithms-for-raga-recognition-2212.05335"/></url>
<url><loc>https://scifaro.com/en/abs/towards-trustworthy-phoneme-boundary-detection-with-autoregressive-model-and-improved-evaluation-metric-2212.06387</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-trustworthy-phoneme-boundary-detection-with-autoregressive-model-and-improved-evaluation-metric-2212.06387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-trustworthy-phoneme-boundary-detection-with-autoregressive-model-and-improved-evaluation-metric-2212.06387"/></url>
<url><loc>https://scifaro.com/en/abs/style-label-free-cross-speaker-style-transfer-by-quantized-vae-and-speaker-wise-normalization-in-speech-synthesis-2212.06397</loc><lastmod>2022-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/style-label-free-cross-speaker-style-transfer-by-quantized-vae-and-speaker-wise-normalization-in-speech-synthesis-2212.06397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/style-label-free-cross-speaker-style-transfer-by-quantized-vae-and-speaker-wise-normalization-in-speech-synthesis-2212.06397"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-prosody-representations-with-unsupervised-speech-reconstruction-2212.06972</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-prosody-representations-with-unsupervised-speech-reconstruction-2212.06972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-prosody-representations-with-unsupervised-speech-reconstruction-2212.06972"/></url>
<url><loc>https://scifaro.com/en/abs/clipsep-learning-text-queried-sound-separation-with-noisy-unlabeled-videos-2212.07065</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clipsep-learning-text-queried-sound-separation-with-noisy-unlabeled-videos-2212.07065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clipsep-learning-text-queried-sound-separation-with-noisy-unlabeled-videos-2212.07065"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-feature-fusion-transformer-network-for-end-to-end-single-channel-speech-separation-2212.07163</loc><lastmod>2022-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-feature-fusion-transformer-network-for-end-to-end-single-channel-speech-separation-2212.07163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-feature-fusion-transformer-network-for-end-to-end-single-channel-speech-separation-2212.07163"/></url>
<url><loc>https://scifaro.com/en/abs/a-large-scale-and-pcr-referenced-vocal-audio-dataset-for-covid-19-2212.07738</loc><lastmod>2023-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-large-scale-and-pcr-referenced-vocal-audio-dataset-for-covid-19-2212.07738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-large-scale-and-pcr-referenced-vocal-audio-dataset-for-covid-19-2212.07738"/></url>
<url><loc>https://scifaro.com/en/abs/towards-unified-all-neural-beamforming-for-time-and-frequency-domain-speech-separation-2212.08348</loc><lastmod>2022-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-unified-all-neural-beamforming-for-time-and-frequency-domain-speech-separation-2212.08348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-unified-all-neural-beamforming-for-time-and-frequency-domain-speech-separation-2212.08348"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-ai-classifiers-show-no-evidence-of-improved-covid-19-screening-over-simple-symptoms-checkers-2212.08570</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-ai-classifiers-show-no-evidence-of-improved-covid-19-screening-over-simple-symptoms-checkers-2212.08570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-ai-classifiers-show-no-evidence-of-improved-covid-19-screening-over-simple-symptoms-checkers-2212.08570"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-design-and-analysis-for-robust-machine-learning-a-case-study-from-covid-19-2212.08571</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-design-and-analysis-for-robust-machine-learning-a-case-study-from-covid-19-2212.08571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-design-and-analysis-for-robust-machine-learning-a-case-study-from-covid-19-2212.08571"/></url>
<url><loc>https://scifaro.com/en/abs/source-tracing-detecting-voice-spoofing-2212.08601</loc><lastmod>2022-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-tracing-detecting-voice-spoofing-2212.08601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-tracing-detecting-voice-spoofing-2212.08601"/></url>
<url><loc>https://scifaro.com/en/abs/learning-from-taxonomy-multi-label-few-shot-classification-for-everyday-sound-recognition-2212.08952</loc><lastmod>2022-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-from-taxonomy-multi-label-few-shot-classification-for-everyday-sound-recognition-2212.08952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-from-taxonomy-multi-label-few-shot-classification-for-everyday-sound-recognition-2212.08952"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-speech-centric-trustworthy-machine-learning-privacy-safety-and-fairness-2212.09006</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-speech-centric-trustworthy-machine-learning-privacy-safety-and-fairness-2212.09006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-speech-centric-trustworthy-machine-learning-privacy-safety-and-fairness-2212.09006"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-workplace-behaviors-through-speaking-patterns-using-large-scale-multimodal-wearable-recordings-a-study-of-healthcare-providers-2212.09090</loc><lastmod>2022-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-workplace-behaviors-through-speaking-patterns-using-large-scale-multimodal-wearable-recordings-a-study-of-healthcare-providers-2212.09090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-workplace-behaviors-through-speaking-patterns-using-large-scale-multimodal-wearable-recordings-a-study-of-healthcare-providers-2212.09090"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-style-conversion-in-the-waveform-domain-using-discrete-self-supervised-units-2212.09730</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-style-conversion-in-the-waveform-domain-using-discrete-self-supervised-units-2212.09730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-style-conversion-in-the-waveform-domain-using-discrete-self-supervised-units-2212.09730"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-effective-fusion-algorithms-for-speech-based-self-supervised-learning-models-2212.10092</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-effective-fusion-algorithms-for-speech-based-self-supervised-learning-models-2212.10092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-effective-fusion-algorithms-for-speech-based-self-supervised-learning-models-2212.10092"/></url>
<url><loc>https://scifaro.com/en/abs/visual-transformers-for-primates-classification-and-covid-detection-2212.10093</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-transformers-for-primates-classification-and-covid-detection-2212.10093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-transformers-for-primates-classification-and-covid-detection-2212.10093"/></url>
<url><loc>https://scifaro.com/en/abs/vsvc-backdoor-attack-against-keyword-spotting-based-on-voiceprint-selection-and-voice-conversion-2212.10103</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vsvc-backdoor-attack-against-keyword-spotting-based-on-voiceprint-selection-and-voice-conversion-2212.10103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vsvc-backdoor-attack-against-keyword-spotting-based-on-voiceprint-selection-and-voice-conversion-2212.10103"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-selectable-end-to-end-text-based-speech-editing-2212.10191</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-selectable-end-to-end-text-based-speech-editing-2212.10191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-selectable-end-to-end-text-based-speech-editing-2212.10191"/></url>
<url><loc>https://scifaro.com/en/abs/hopf-physical-reservoir-computer-for-reconfigurable-sound-recognition-2212.10370</loc><lastmod>2023-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hopf-physical-reservoir-computer-for-reconfigurable-sound-recognition-2212.10370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hopf-physical-reservoir-computer-for-reconfigurable-sound-recognition-2212.10370"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-visual-speech-separation-model-inspired-by-cortico-thalamo-cortical-circuits-2212.10744</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-visual-speech-separation-model-inspired-by-cortico-thalamo-cortical-circuits-2212.10744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-visual-speech-separation-model-inspired-by-cortico-thalamo-cortical-circuits-2212.10744"/></url>
<url><loc>https://scifaro.com/en/abs/4d-asr-joint-modeling-of-ctc-attention-transducer-and-mask-predict-decoders-2212.10818</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/4d-asr-joint-modeling-of-ctc-attention-transducer-and-mask-predict-decoders-2212.10818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/4d-asr-joint-modeling-of-ctc-attention-transducer-and-mask-predict-decoders-2212.10818"/></url>
<url><loc>https://scifaro.com/en/abs/alcap-alignment-augmented-music-captioner-2212.10901</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alcap-alignment-augmented-music-captioner-2212.10901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alcap-alignment-augmented-music-captioner-2212.10901"/></url>
<url><loc>https://scifaro.com/en/abs/polytopic-analysis-of-music-2212.11054</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polytopic-analysis-of-music-2212.11054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polytopic-analysis-of-music-2212.11054"/></url>
<url><loc>https://scifaro.com/en/abs/generating-music-with-sentiment-using-transformer-gans-2212.11134</loc><lastmod>2022-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-music-with-sentiment-using-transformer-gans-2212.11134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-music-with-sentiment-using-transformer-gans-2212.11134"/></url>
<url><loc>https://scifaro.com/en/abs/audio-denoising-for-robust-audio-fingerprinting-2212.11277</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-denoising-for-robust-audio-fingerprinting-2212.11277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-denoising-for-robust-audio-fingerprinting-2212.11277"/></url>
<url><loc>https://scifaro.com/en/abs/earspy-spying-caller-speech-and-identity-through-tiny-vibrations-of-smartphone-ear-speakers-2212.12151</loc><lastmod>2022-12-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/earspy-spying-caller-speech-and-identity-through-tiny-vibrations-of-smartphone-ear-speakers-2212.12151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/earspy-spying-caller-speech-and-identity-through-tiny-vibrations-of-smartphone-ear-speakers-2212.12151"/></url>
<url><loc>https://scifaro.com/en/abs/feature-selection-approaches-for-optimising-music-emotion-recognition-methods-2212.13369</loc><lastmod>2022-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-selection-approaches-for-optimising-music-emotion-recognition-methods-2212.13369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-selection-approaches-for-optimising-music-emotion-recognition-methods-2212.13369"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-with-limited-data-and-limitless-data-augmentations-2212.13581</loc><lastmod>2022-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-with-limited-data-and-limitless-data-augmentations-2212.13581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-with-limited-data-and-limitless-data-augmentations-2212.13581"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-deep-learning-system-for-depression-and-anxiety-detection-2212.14490</loc><lastmod>2023-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-deep-learning-system-for-depression-and-anxiety-detection-2212.14490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-deep-learning-system-for-depression-and-anxiety-detection-2212.14490"/></url>
<url><loc>https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-audio-deepfake-detection-2212.14597</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-audio-deepfake-detection-2212.14597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-audio-deepfake-detection-2212.14597"/></url>
<url><loc>https://scifaro.com/en/abs/blind-restoration-of-real-world-audio-by-1d-operational-gans-2212.14618</loc><lastmod>2023-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-restoration-of-real-world-audio-by-1d-operational-gans-2212.14618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-restoration-of-real-world-audio-by-1d-operational-gans-2212.14618"/></url>
<url><loc>https://scifaro.com/en/abs/emogator-a-new-open-source-vocal-burst-dataset-with-baseline-machine-learning-classification-methodologies-2301.00508</loc><lastmod>2023-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emogator-a-new-open-source-vocal-burst-dataset-with-baseline-machine-learning-classification-methodologies-2301.00508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emogator-a-new-open-source-vocal-burst-dataset-with-baseline-machine-learning-classification-methodologies-2301.00508"/></url>
<url><loc>https://scifaro.com/en/abs/language-models-are-drummers-drum-composition-with-natural-language-pre-training-2301.01162</loc><lastmod>2023-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-models-are-drummers-drum-composition-with-natural-language-pre-training-2301.01162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-models-are-drummers-drum-composition-with-natural-language-pre-training-2301.01162"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-based-framework-for-mispronunciation-detection-of-arabic-phonemes-2301.01378</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-based-framework-for-mispronunciation-detection-of-arabic-phonemes-2301.01378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-based-framework-for-mispronunciation-detection-of-arabic-phonemes-2301.01378"/></url>
<url><loc>https://scifaro.com/en/abs/validity-in-music-information-research-experiments-2301.01578</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/validity-in-music-information-research-experiments-2301.01578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/validity-in-music-information-research-experiments-2301.01578"/></url>
<url><loc>https://scifaro.com/en/abs/multi-genre-music-transformer-composing-full-length-musical-piece-2301.02385</loc><lastmod>2023-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-genre-music-transformer-composing-full-length-musical-piece-2301.02385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-genre-music-transformer-composing-full-length-musical-piece-2301.02385"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-lyrics-rhythm-matching-2301.02732</loc><lastmod>2025-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-lyrics-rhythm-matching-2301.02732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-lyrics-rhythm-matching-2301.02732"/></url>
<url><loc>https://scifaro.com/en/abs/tunesformer-forming-irish-tunes-with-control-codes-by-bar-patching-2301.02884</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tunesformer-forming-irish-tunes-with-control-codes-by-bar-patching-2301.02884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tunesformer-forming-irish-tunes-with-control-codes-by-bar-patching-2301.02884"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-neural-physical-sound-matching-2301.02886</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-neural-physical-sound-matching-2301.02886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-neural-physical-sound-matching-2301.02886"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-model-inversion-attacks-on-automatic-speaker-recognition-2301.03206</loc><lastmod>2023-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-model-inversion-attacks-on-automatic-speaker-recognition-2301.03206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-model-inversion-attacks-on-automatic-speaker-recognition-2301.03206"/></url>
<url><loc>https://scifaro.com/en/abs/generative-emotional-ai-for-speech-emotion-recognition-the-case-for-synthetic-emotional-speech-augmentation-2301.03751</loc><lastmod>2023-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-emotional-ai-for-speech-emotion-recognition-the-case-for-synthetic-emotional-speech-augmentation-2301.03751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-emotional-ai-for-speech-emotion-recognition-the-case-for-synthetic-emotional-speech-augmentation-2301.03751"/></url>
<url><loc>https://scifaro.com/en/abs/unifyspeech-a-unified-framework-for-zero-shot-text-to-speech-and-voice-conversion-2301.03801</loc><lastmod>2023-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifyspeech-a-unified-framework-for-zero-shot-text-to-speech-and-voice-conversion-2301.03801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifyspeech-a-unified-framework-for-zero-shot-text-to-speech-and-voice-conversion-2301.03801"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-complex-valued-deep-neural-networks-for-monaural-speech-enhancement-2301.04320</loc><lastmod>2023-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-complex-valued-deep-neural-networks-for-monaural-speech-enhancement-2301.04320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-complex-valued-deep-neural-networks-for-monaural-speech-enhancement-2301.04320"/></url>
<url><loc>https://scifaro.com/en/abs/perceive-and-predict-self-supervised-speech-representation-based-loss-functions-for-speech-enhancement-2301.04388</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceive-and-predict-self-supervised-speech-representation-based-loss-functions-for-speech-enhancement-2301.04388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceive-and-predict-self-supervised-speech-representation-based-loss-functions-for-speech-enhancement-2301.04388"/></url>
<url><loc>https://scifaro.com/en/abs/wuyun-exploring-hierarchical-skeleton-guided-melody-generation-using-knowledge-enhanced-deep-learning-2301.04488</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wuyun-exploring-hierarchical-skeleton-guided-melody-generation-using-knowledge-enhanced-deep-learning-2301.04488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wuyun-exploring-hierarchical-skeleton-guided-melody-generation-using-knowledge-enhanced-deep-learning-2301.04488"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-correlates-of-the-syllabic-rhythm-of-speech-modulation-spectrum-or-local-features-of-the-temporal-envelope-2301.05898</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-correlates-of-the-syllabic-rhythm-of-speech-modulation-spectrum-or-local-features-of-the-temporal-envelope-2301.05898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-correlates-of-the-syllabic-rhythm-of-speech-modulation-spectrum-or-local-features-of-the-temporal-envelope-2301.05898"/></url>
<url><loc>https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-symbolic-homophony-music-scores-2301.05908</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-symbolic-homophony-music-scores-2301.05908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-symbolic-homophony-music-scores-2301.05908"/></url>
<url><loc>https://scifaro.com/en/abs/training-one-model-to-detect-heart-and-lung-sound-events-from-single-point-auscultations-2301.06078</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-one-model-to-detect-heart-and-lung-sound-events-from-single-point-auscultations-2301.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-one-model-to-detect-heart-and-lung-sound-events-from-single-point-auscultations-2301.06078"/></url>
<url><loc>https://scifaro.com/en/abs/what-artificial-intelligence-might-teach-us-about-the-origin-of-human-language-2301.06211</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-artificial-intelligence-might-teach-us-about-the-origin-of-human-language-2301.06211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-artificial-intelligence-might-teach-us-about-the-origin-of-human-language-2301.06211"/></url>
<url><loc>https://scifaro.com/en/abs/improving-target-speaker-extraction-with-sparse-lda-transformed-speaker-embeddings-2301.06277</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-target-speaker-extraction-with-sparse-lda-transformed-speaker-embeddings-2301.06277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-target-speaker-extraction-with-sparse-lda-transformed-speaker-embeddings-2301.06277"/></url>
<url><loc>https://scifaro.com/en/abs/msanii-high-fidelity-music-synthesis-on-a-shoestring-budget-2301.06468</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msanii-high-fidelity-music-synthesis-on-a-shoestring-budget-2301.06468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msanii-high-fidelity-music-synthesis-on-a-shoestring-budget-2301.06468"/></url>
<url><loc>https://scifaro.com/en/abs/the-newsbridge-telecom-sudparis-voxceleb-speaker-recognition-challenge-2022-system-description-2301.07491</loc><lastmod>2023-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-newsbridge-telecom-sudparis-voxceleb-speaker-recognition-challenge-2022-system-description-2301.07491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-newsbridge-telecom-sudparis-voxceleb-speaker-recognition-challenge-2022-system-description-2301.07491"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-the-reconstruction-capacity-of-stacked-convolutional-autoencoders-for-log-mel-spectrograms-2301.07665</loc><lastmod>2023-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-the-reconstruction-capacity-of-stacked-convolutional-autoencoders-for-log-mel-spectrograms-2301.07665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-the-reconstruction-capacity-of-stacked-convolutional-autoencoders-for-log-mel-spectrograms-2301.07665"/></url>
<url><loc>https://scifaro.com/en/abs/from-english-to-more-languages-parameter-efficient-model-reprogramming-for-cross-lingual-speech-recognition-2301.07851</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-english-to-more-languages-parameter-efficient-model-reprogramming-for-cross-lingual-speech-recognition-2301.07851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-english-to-more-languages-parameter-efficient-model-reprogramming-for-cross-lingual-speech-recognition-2301.07851"/></url>
<url><loc>https://scifaro.com/en/abs/thlnet-two-stage-heterogeneous-lightweight-network-for-monaural-speech-enhancement-2301.07939</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thlnet-two-stage-heterogeneous-lightweight-network-for-monaural-speech-enhancement-2301.07939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thlnet-two-stage-heterogeneous-lightweight-network-for-monaural-speech-enhancement-2301.07939"/></url>
<url><loc>https://scifaro.com/en/abs/spothitpy-a-study-for-ml-based-song-hit-prediction-using-spotify-2301.07978</loc><lastmod>2023-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spothitpy-a-study-for-ml-based-song-hit-prediction-using-spotify-2301.07978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spothitpy-a-study-for-ml-based-song-hit-prediction-using-spotify-2301.07978"/></url>
<url><loc>https://scifaro.com/en/abs/adjoint-based-identification-of-sound-sources-for-sound-reinforcement-and-source-localization-2301.08620</loc><lastmod>2023-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adjoint-based-identification-of-sound-sources-for-sound-reinforcement-and-source-localization-2301.08620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adjoint-based-identification-of-sound-sources-for-sound-reinforcement-and-source-localization-2301.08620"/></url>
<url><loc>https://scifaro.com/en/abs/cellular-network-speech-enhancement-removing-background-and-transmission-noise-2301.09027</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cellular-network-speech-enhancement-removing-background-and-transmission-noise-2301.09027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cellular-network-speech-enhancement-removing-background-and-transmission-noise-2301.09027"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-on-heart-sound-analysis-in-the-deep-learning-era-2301.09362</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-heart-sound-analysis-in-the-deep-learning-era-2301.09362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-heart-sound-analysis-in-the-deep-learning-era-2301.09362"/></url>
<url><loc>https://scifaro.com/en/abs/deep-attention-based-alignment-network-for-melody-generation-from-incomplete-lyrics-2301.10015</loc><lastmod>2023-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-attention-based-alignment-network-for-melody-generation-from-incomplete-lyrics-2301.10015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-attention-based-alignment-network-for-melody-generation-from-incomplete-lyrics-2301.10015"/></url>
<url><loc>https://scifaro.com/en/abs/mesostructures-beyond-spectrogram-loss-in-differentiable-time-frequency-analysis-2301.10183</loc><lastmod>2023-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mesostructures-beyond-spectrogram-loss-in-differentiable-time-frequency-analysis-2301.10183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mesostructures-beyond-spectrogram-loss-in-differentiable-time-frequency-analysis-2301.10183"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-multiaccented-multispeaker-tts-with-radtts-2301.10335</loc><lastmod>2023-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-multiaccented-multispeaker-tts-with-radtts-2301.10335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-multiaccented-multispeaker-tts-with-radtts-2301.10335"/></url>
<url><loc>https://scifaro.com/en/abs/hear4health-a-blueprint-for-making-computer-audition-a-staple-of-modern-healthcare-2301.10477</loc><lastmod>2023-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hear4health-a-blueprint-for-making-computer-audition-a-staple-of-modern-healthcare-2301.10477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hear4health-a-blueprint-for-making-computer-audition-a-staple-of-modern-healthcare-2301.10477"/></url>
<url><loc>https://scifaro.com/en/abs/on-batching-variable-size-inputs-for-training-end-to-end-speech-enhancement-systems-2301.10587</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-batching-variable-size-inputs-for-training-end-to-end-speech-enhancement-systems-2301.10587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-batching-variable-size-inputs-for-training-end-to-end-speech-enhancement-systems-2301.10587"/></url>
<url><loc>https://scifaro.com/en/abs/musiclm-generating-music-from-text-2301.11325</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musiclm-generating-music-from-text-2301.11325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musiclm-generating-music-from-text-2301.11325"/></url>
<url><loc>https://scifaro.com/en/abs/automated-arrangements-of-multi-part-music-for-sets-of-monophonic-instruments-2301.12084</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-arrangements-of-multi-part-music-for-sets-of-monophonic-instruments-2301.12084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-arrangements-of-multi-part-music-for-sets-of-monophonic-instruments-2301.12084"/></url>
<url><loc>https://scifaro.com/en/abs/who-is-snoring-snore-based-user-recognition-2301.12209</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-is-snoring-snore-based-user-recognition-2301.12209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-is-snoring-snore-based-user-recognition-2301.12209"/></url>
<url><loc>https://scifaro.com/en/abs/achieving-timestamp-prediction-while-recognizing-with-non-autoregressive-end-to-end-asr-model-2301.12343</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/achieving-timestamp-prediction-while-recognizing-with-non-autoregressive-end-to-end-asr-model-2301.12343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/achieving-timestamp-prediction-while-recognizing-with-non-autoregressive-end-to-end-asr-model-2301.12343"/></url>
<url><loc>https://scifaro.com/en/abs/artistic-curve-steganography-carried-by-musical-audio-2301.12354</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artistic-curve-steganography-carried-by-musical-audio-2301.12354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artistic-curve-steganography-carried-by-musical-audio-2301.12354"/></url>
<url><loc>https://scifaro.com/en/abs/audioldm-text-to-audio-generation-with-latent-diffusion-models-2301.12503</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioldm-text-to-audio-generation-with-latent-diffusion-models-2301.12503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioldm-text-to-audio-generation-with-latent-diffusion-models-2301.12503"/></url>
<url><loc>https://scifaro.com/en/abs/composer-s-assistant-an-interactive-transformer-for-multi-track-midi-infilling-2301.12525</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composer-s-assistant-an-interactive-transformer-for-multi-track-midi-infilling-2301.12525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composer-s-assistant-an-interactive-transformer-for-multi-track-midi-infilling-2301.12525"/></url>
<url><loc>https://scifaro.com/en/abs/make-an-audio-text-to-audio-generation-with-prompt-enhanced-diffusion-models-2301.12661</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/make-an-audio-text-to-audio-generation-with-prompt-enhanced-diffusion-models-2301.12661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/make-an-audio-text-to-audio-generation-with-prompt-enhanced-diffusion-models-2301.12661"/></url>
<url><loc>https://scifaro.com/en/abs/singsong-generating-musical-accompaniments-from-singing-2301.12662</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singsong-generating-musical-accompaniments-from-singing-2301.12662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singsong-generating-musical-accompaniments-from-singing-2301.12662"/></url>
<url><loc>https://scifaro.com/en/abs/archisound-audio-generation-with-diffusion-2301.13267</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/archisound-audio-generation-with-diffusion-2301.13267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/archisound-audio-generation-with-diffusion-2301.13267"/></url>
<url><loc>https://scifaro.com/en/abs/automated-time-frequency-domain-audio-crossfades-using-graph-cuts-2301.13380</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-time-frequency-domain-audio-crossfades-using-graph-cuts-2301.13380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-time-frequency-domain-audio-crossfades-using-graph-cuts-2301.13380"/></url>
<url><loc>https://scifaro.com/en/abs/an-comparative-analysis-of-different-pitch-and-metrical-grid-encoding-methods-in-the-task-of-sequential-music-generation-2301.13383</loc><lastmod>2023-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-comparative-analysis-of-different-pitch-and-metrical-grid-encoding-methods-in-the-task-of-sequential-music-generation-2301.13383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-comparative-analysis-of-different-pitch-and-metrical-grid-encoding-methods-in-the-task-of-sequential-music-generation-2301.13383"/></url>
<url><loc>https://scifaro.com/en/abs/instructtts-modelling-expressive-tts-in-discrete-latent-space-with-natural-language-style-prompt-2301.13662</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instructtts-modelling-expressive-tts-in-discrete-latent-space-with-natural-language-style-prompt-2301.13662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instructtts-modelling-expressive-tts-in-discrete-latent-space-with-natural-language-style-prompt-2301.13662"/></url>
<url><loc>https://scifaro.com/en/abs/jointist-simultaneous-improvement-of-multi-instrument-transcription-and-music-source-separation-via-joint-training-2302.00286</loc><lastmod>2023-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointist-simultaneous-improvement-of-multi-instrument-transcription-and-music-source-separation-via-joint-training-2302.00286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointist-simultaneous-improvement-of-multi-instrument-transcription-and-music-source-separation-via-joint-training-2302.00286"/></url>
<url><loc>https://scifaro.com/en/abs/epic-sounds-a-large-scale-dataset-of-actions-that-sound-2302.00646</loc><lastmod>2025-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/epic-sounds-a-large-scale-dataset-of-actions-that-sound-2302.00646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/epic-sounds-a-large-scale-dataset-of-actions-that-sound-2302.00646"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-for-virtual-meetings-on-cellular-networks-2302.00868</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-for-virtual-meetings-on-cellular-networks-2302.00868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-for-virtual-meetings-on-cellular-networks-2302.00868"/></url>
<url><loc>https://scifaro.com/en/abs/goniometers-are-a-powerful-acoustic-feature-for-music-information-retrieval-tasks-2302.01090</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/goniometers-are-a-powerful-acoustic-feature-for-music-information-retrieval-tasks-2302.01090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/goniometers-are-a-powerful-acoustic-feature-for-music-information-retrieval-tasks-2302.01090"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-diffusion-models-for-simultaneous-music-generation-and-separation-2302.02257</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-diffusion-models-for-simultaneous-music-generation-and-separation-2302.02257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-diffusion-models-for-simultaneous-music-generation-and-separation-2302.02257"/></url>
<url><loc>https://scifaro.com/en/abs/audio-representation-learning-by-distilling-video-as-privileged-information-2302.02845</loc><lastmod>2023-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-representation-learning-by-distilling-video-as-privileged-information-2302.02845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-representation-learning-by-distilling-video-as-privileged-information-2302.02845"/></url>
<url><loc>https://scifaro.com/en/abs/improved-vehicle-sub-type-classification-for-acoustic-traffic-monitoring-2302.02945</loc><lastmod>2023-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-vehicle-sub-type-classification-for-acoustic-traffic-monitoring-2302.02945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-vehicle-sub-type-classification-for-acoustic-traffic-monitoring-2302.02945"/></url>
<url><loc>https://scifaro.com/en/abs/speak-read-and-prompt-high-fidelity-text-to-speech-with-minimal-supervision-2302.03540</loc><lastmod>2023-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-read-and-prompt-high-fidelity-text-to-speech-with-minimal-supervision-2302.03540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-read-and-prompt-high-fidelity-text-to-speech-with-minimal-supervision-2302.03540"/></url>
<url><loc>https://scifaro.com/en/abs/noise2music-text-conditioned-music-generation-with-diffusion-models-2302.03917</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise2music-text-conditioned-music-generation-with-diffusion-models-2302.03917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise2music-text-conditioned-music-generation-with-diffusion-models-2302.03917"/></url>
<url><loc>https://scifaro.com/en/abs/ernie-music-text-to-waveform-music-generation-with-diffusion-models-2302.04456</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ernie-music-text-to-waveform-music-generation-with-diffusion-models-2302.04456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ernie-music-text-to-waveform-music-generation-with-diffusion-models-2302.04456"/></url>
<url><loc>https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-speech-dereverberation-using-kalman-filters-2302.04469</loc><lastmod>2023-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-speech-dereverberation-using-kalman-filters-2302.04469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-speech-dereverberation-using-kalman-filters-2302.04469"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-total-variation-regularization-in-the-design-of-an-intelligent-query-by-humming-system-2302.04577</loc><lastmod>2023-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-total-variation-regularization-in-the-design-of-an-intelligent-query-by-humming-system-2302.04577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-total-variation-regularization-in-the-design-of-an-intelligent-query-by-humming-system-2302.04577"/></url>
<url><loc>https://scifaro.com/en/abs/gtr-ctrl-instrument-and-genre-conditioning-for-guitar-focused-music-generation-with-transformers-2302.05393</loc><lastmod>2023-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gtr-ctrl-instrument-and-genre-conditioning-for-guitar-focused-music-generation-with-transformers-2302.05393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gtr-ctrl-instrument-and-genre-conditioning-for-guitar-focused-music-generation-with-transformers-2302.05393"/></url>
<url><loc>https://scifaro.com/en/abs/attention-does-not-guarantee-best-performance-in-speech-enhancement-2302.05690</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-does-not-guarantee-best-performance-in-speech-enhancement-2302.05690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-does-not-guarantee-best-performance-in-speech-enhancement-2302.05690"/></url>
<url><loc>https://scifaro.com/en/abs/local-spectral-attention-for-full-band-speech-enhancement-2302.05693</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-spectral-attention-for-full-band-speech-enhancement-2302.05693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-spectral-attention-for-full-band-speech-enhancement-2302.05693"/></url>
<url><loc>https://scifaro.com/en/abs/parameterizable-acoustical-modeling-and-auralization-of-cultural-heritage-sites-based-on-photogrammetry-2302.05725</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameterizable-acoustical-modeling-and-auralization-of-cultural-heritage-sites-based-on-photogrammetry-2302.05725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameterizable-acoustical-modeling-and-auralization-of-cultural-heritage-sites-based-on-photogrammetry-2302.05725"/></url>
<url><loc>https://scifaro.com/en/abs/semanticac-semantics-assisted-framework-for-audio-classification-2302.05940</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semanticac-semantics-assisted-framework-for-audio-classification-2302.05940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semanticac-semantics-assisted-framework-for-audio-classification-2302.05940"/></url>
<url><loc>https://scifaro.com/en/abs/detection-and-classification-of-vocal-productions-in-large-scale-audio-recordings-2302.07640</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-and-classification-of-vocal-productions-in-large-scale-audio-recordings-2302.07640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-and-classification-of-vocal-productions-in-large-scale-audio-recordings-2302.07640"/></url>
<url><loc>https://scifaro.com/en/abs/paaploss-a-phonetic-aligned-acoustic-parameter-loss-for-speech-enhancement-2302.08095</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paaploss-a-phonetic-aligned-acoustic-parameter-loss-for-speech-enhancement-2302.08095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paaploss-a-phonetic-aligned-acoustic-parameter-loss-for-speech-enhancement-2302.08095"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-audio-quality-preference-prediction-2302.08130</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-audio-quality-preference-prediction-2302.08130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-audio-quality-preference-prediction-2302.08130"/></url>
<url><loc>https://scifaro.com/en/abs/an-attention-based-approach-to-hierarchical-multi-label-music-instrument-classification-2302.08136</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attention-based-approach-to-hierarchical-multi-label-music-instrument-classification-2302.08136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attention-based-approach-to-hierarchical-multi-label-music-instrument-classification-2302.08136"/></url>
<url><loc>https://scifaro.com/en/abs/ace-vc-adaptive-and-controllable-voice-conversion-using-explicitly-disentangled-self-supervised-speech-representations-2302.08137</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ace-vc-adaptive-and-controllable-voice-conversion-using-explicitly-disentangled-self-supervised-speech-representations-2302.08137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ace-vc-adaptive-and-controllable-voice-conversion-using-explicitly-disentangled-self-supervised-speech-representations-2302.08137"/></url>
<url><loc>https://scifaro.com/en/abs/quickvc-any-to-many-voice-conversion-using-inverse-short-time-fourier-transform-for-faster-conversion-2302.08296</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quickvc-any-to-many-voice-conversion-using-inverse-short-time-fourier-transform-for-faster-conversion-2302.08296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quickvc-any-to-many-voice-conversion-using-inverse-short-time-fourier-transform-for-faster-conversion-2302.08296"/></url>
<url><loc>https://scifaro.com/en/abs/jazznet-a-dataset-of-fundamental-piano-patterns-for-music-audio-machine-learning-research-2302.08632</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jazznet-a-dataset-of-fundamental-piano-patterns-for-music-audio-machine-learning-research-2302.08632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jazznet-a-dataset-of-fundamental-piano-patterns-for-music-audio-machine-learning-research-2302.08632"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-smoothed-imbalance-data-improves-speech-emotion-recognition-2302.08650</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-smoothed-imbalance-data-improves-speech-emotion-recognition-2302.08650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-smoothed-imbalance-data-improves-speech-emotion-recognition-2302.08650"/></url>
<url><loc>https://scifaro.com/en/abs/lip-to-speech-synthesis-in-the-wild-with-multi-task-learning-2302.08841</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lip-to-speech-synthesis-in-the-wild-with-multi-task-learning-2302.08841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lip-to-speech-synthesis-in-the-wild-with-multi-task-learning-2302.08841"/></url>
<url><loc>https://scifaro.com/en/abs/deep-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2302.08921</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2302.08921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2302.08921"/></url>
<url><loc>https://scifaro.com/en/abs/exposing-ai-synthesized-human-voices-using-neural-vocoder-artifacts-2302.09198</loc><lastmod>2023-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exposing-ai-synthesized-human-voices-using-neural-vocoder-artifacts-2302.09198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exposing-ai-synthesized-human-voices-using-neural-vocoder-artifacts-2302.09198"/></url>
<url><loc>https://scifaro.com/en/abs/cost-effective-models-for-detecting-depression-from-speech-2302.09214</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cost-effective-models-for-detecting-depression-from-speech-2302.09214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cost-effective-models-for-detecting-depression-from-speech-2302.09214"/></url>
<url><loc>https://scifaro.com/en/abs/a-sidecar-separator-can-convert-a-single-talker-speech-recognition-system-to-a-multi-talker-one-2302.09908</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sidecar-separator-can-convert-a-single-talker-speech-recognition-system-to-a-multi-talker-one-2302.09908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sidecar-separator-can-convert-a-single-talker-speech-recognition-system-to-a-multi-talker-one-2302.09908"/></url>
<url><loc>https://scifaro.com/en/abs/towards-measuring-and-scoring-speaker-diarization-fairness-2302.09991</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-measuring-and-scoring-speaker-diarization-fairness-2302.09991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-measuring-and-scoring-speaker-diarization-fairness-2302.09991"/></url>
<url><loc>https://scifaro.com/en/abs/voxsrc-2022-the-fourth-voxceleb-speaker-recognition-challenge-2302.10248</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxsrc-2022-the-fourth-voxceleb-speaker-recognition-challenge-2302.10248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxsrc-2022-the-fourth-voxceleb-speaker-recognition-challenge-2302.10248"/></url>
<url><loc>https://scifaro.com/en/abs/pykanto-a-python-library-to-accelerate-research-on-wild-bird-song-2302.10340</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pykanto-a-python-library-to-accelerate-research-on-wild-bird-song-2302.10340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pykanto-a-python-library-to-accelerate-research-on-wild-bird-song-2302.10340"/></url>
<url><loc>https://scifaro.com/en/abs/nonparallel-emotional-voice-conversion-for-unseen-speaker-emotion-pairs-using-dual-domain-adversarial-network-virtual-domain-pairing-2302.10536</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonparallel-emotional-voice-conversion-for-unseen-speaker-emotion-pairs-using-dual-domain-adversarial-network-virtual-domain-pairing-2302.10536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonparallel-emotional-voice-conversion-for-unseen-speaker-emotion-pairs-using-dual-domain-adversarial-network-virtual-domain-pairing-2302.10536"/></url>
<url><loc>https://scifaro.com/en/abs/dasformer-deep-alternating-spectrogram-transformer-for-multi-single-channel-speech-separation-2302.10657</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dasformer-deep-alternating-spectrogram-transformer-for-multi-single-channel-speech-separation-2302.10657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dasformer-deep-alternating-spectrogram-transformer-for-multi-single-channel-speech-separation-2302.10657"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-spectrum-transformation-attacks-to-speaker-recognition-2302.10686</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-spectrum-transformation-attacks-to-speaker-recognition-2302.10686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-spectrum-transformation-attacks-to-speaker-recognition-2302.10686"/></url>
<url><loc>https://scifaro.com/en/abs/a-reinforcement-learning-framework-for-online-speaker-diarization-2302.10924</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-reinforcement-learning-framework-for-online-speaker-diarization-2302.10924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-reinforcement-learning-framework-for-online-speaker-diarization-2302.10924"/></url>
<url><loc>https://scifaro.com/en/abs/do-orcas-have-semantic-language-machine-learning-to-predict-orca-behaviors-using-partially-labeled-vocalization-data-2302.10983</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-orcas-have-semantic-language-machine-learning-to-predict-orca-behaviors-using-partially-labeled-vocalization-data-2302.10983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-orcas-have-semantic-language-machine-learning-to-predict-orca-behaviors-using-partially-labeled-vocalization-data-2302.10983"/></url>
<url><loc>https://scifaro.com/en/abs/improving-contextual-spelling-correction-by-external-acoustics-attention-and-semantic-aware-data-augmentation-2302.11192</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-contextual-spelling-correction-by-external-acoustics-attention-and-semantic-aware-data-augmentation-2302.11192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-contextual-spelling-correction-by-external-acoustics-attention-and-semantic-aware-data-augmentation-2302.11192"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-audio-visual-co-learning-for-text-independent-speaker-verification-2302.11254</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-audio-visual-co-learning-for-text-independent-speaker-verification-2302.11254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-audio-visual-co-learning-for-text-independent-speaker-verification-2302.11254"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-stuttering-detection-via-data-augmentation-class-balanced-loss-and-multi-contextual-deep-learning-2302.11343</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-stuttering-detection-via-data-augmentation-class-balanced-loss-and-multi-contextual-deep-learning-2302.11343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-stuttering-detection-via-data-augmentation-class-balanced-loss-and-multi-contextual-deep-learning-2302.11343"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-enhancement-via-event-based-query-2302.11558</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-enhancement-via-event-based-query-2302.11558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-enhancement-via-event-based-query-2302.11558"/></url>
<url><loc>https://scifaro.com/en/abs/mossformer-pushing-the-performance-limit-of-monaural-speech-separation-using-gated-single-head-transformer-with-convolution-augmented-joint-self-attentions-2302.11824</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mossformer-pushing-the-performance-limit-of-monaural-speech-separation-using-gated-single-head-transformer-with-convolution-augmented-joint-self-attentions-2302.11824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mossformer-pushing-the-performance-limit-of-monaural-speech-separation-using-gated-single-head-transformer-with-convolution-augmented-joint-self-attentions-2302.11824"/></url>
<url><loc>https://scifaro.com/en/abs/d2former-a-fully-complex-dual-path-dual-decoder-conformer-network-using-joint-complex-masking-and-complex-spectral-mapping-for-monaural-speech-enhancement-2302.11832</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/d2former-a-fully-complex-dual-path-dual-decoder-conformer-network-using-joint-complex-masking-and-complex-spectral-mapping-for-monaural-speech-enhancement-2302.11832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/d2former-a-fully-complex-dual-path-dual-decoder-conformer-network-using-joint-complex-masking-and-complex-spectral-mapping-for-monaural-speech-enhancement-2302.11832"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-noise-adaptation-using-data-simulation-2302.11981</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-noise-adaptation-using-data-simulation-2302.11981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-noise-adaptation-using-data-simulation-2302.11981"/></url>
<url><loc>https://scifaro.com/en/abs/metric-oriented-speech-enhancement-using-diffusion-probabilistic-model-2302.11989</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-oriented-speech-enhancement-using-diffusion-probabilistic-model-2302.11989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-oriented-speech-enhancement-using-diffusion-probabilistic-model-2302.11989"/></url>
<url><loc>https://scifaro.com/en/abs/data-leakage-in-cross-modal-retrieval-training-a-case-study-2302.12258</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-leakage-in-cross-modal-retrieval-training-a-case-study-2302.12258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-leakage-in-cross-modal-retrieval-training-a-case-study-2302.12258"/></url>
<url><loc>https://scifaro.com/en/abs/catch-you-and-i-can-revealing-source-voiceprint-against-voice-conversion-2302.12434</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catch-you-and-i-can-revealing-source-voiceprint-against-voice-conversion-2302.12434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catch-you-and-i-can-revealing-source-voiceprint-against-voice-conversion-2302.12434"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-hierarchical-clustering-using-graph-neural-networks-for-speaker-diarization-2302.12716</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-hierarchical-clustering-using-graph-neural-networks-for-speaker-diarization-2302.12716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-hierarchical-clustering-using-graph-neural-networks-for-speaker-diarization-2302.12716"/></url>
<url><loc>https://scifaro.com/en/abs/towards-multi-task-learning-of-speech-and-speaker-recognition-2302.12773</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-multi-task-learning-of-speech-and-speaker-recognition-2302.12773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-multi-task-learning-of-speech-and-speaker-recognition-2302.12773"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-in-realistic-scenario-using-multimodal-data-2302.13033</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-in-realistic-scenario-using-multimodal-data-2302.13033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-in-realistic-scenario-using-multimodal-data-2302.13033"/></url>
<url><loc>https://scifaro.com/en/abs/two-stream-joint-training-for-speaker-independent-acoustic-to-articulatory-inversion-2302.13273</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stream-joint-training-for-speaker-independent-acoustic-to-articulatory-inversion-2302.13273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stream-joint-training-for-speaker-independent-acoustic-to-articulatory-inversion-2302.13273"/></url>
<url><loc>https://scifaro.com/en/abs/mingling-or-misalignment-temporal-shift-for-speech-emotion-recognition-with-pre-trained-representations-2302.13277</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mingling-or-misalignment-temporal-shift-for-speech-emotion-recognition-with-pre-trained-representations-2302.13277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mingling-or-misalignment-temporal-shift-for-speech-emotion-recognition-with-pre-trained-representations-2302.13277"/></url>
<url><loc>https://scifaro.com/en/abs/contrast-plc-contrastive-learning-for-packet-loss-concealment-2302.13284</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrast-plc-contrastive-learning-for-packet-loss-concealment-2302.13284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrast-plc-contrastive-learning-for-packet-loss-concealment-2302.13284"/></url>
<url><loc>https://scifaro.com/en/abs/implementation-of-an-aeroacoustic-simulation-pipeline-using-opencfs-acoustics-and-opencfs-data-applied-to-human-phonation-2302.13290</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementation-of-an-aeroacoustic-simulation-pipeline-using-opencfs-acoustics-and-opencfs-data-applied-to-human-phonation-2302.13290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementation-of-an-aeroacoustic-simulation-pipeline-using-opencfs-acoustics-and-opencfs-data-applied-to-human-phonation-2302.13290"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modality-in-music-predicting-emotion-in-music-from-high-level-audio-features-and-lyrics-2302.13321</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modality-in-music-predicting-emotion-in-music-from-high-level-audio-features-and-lyrics-2302.13321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modality-in-music-predicting-emotion-in-music-from-high-level-audio-features-and-lyrics-2302.13321"/></url>
<url><loc>https://scifaro.com/en/abs/from-audio-to-symbolic-encoding-2302.13401</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-audio-to-symbolic-encoding-2302.13401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-audio-to-symbolic-encoding-2302.13401"/></url>
<url><loc>https://scifaro.com/en/abs/a-low-latency-attention-module-for-streaming-self-supervised-speech-representation-learning-2302.13451</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-low-latency-attention-module-for-streaming-self-supervised-speech-representation-learning-2302.13451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-low-latency-attention-module-for-streaming-self-supervised-speech-representation-learning-2302.13451"/></url>
<url><loc>https://scifaro.com/en/abs/3d-neural-beamforming-for-multi-channel-speech-separation-against-location-uncertainty-2302.13462</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-neural-beamforming-for-multi-channel-speech-separation-against-location-uncertainty-2302.13462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-neural-beamforming-for-multi-channel-speech-separation-against-location-uncertainty-2302.13462"/></url>
<url><loc>https://scifaro.com/en/abs/ve-kws-visual-modality-enhanced-end-to-end-keyword-spotting-2302.13523</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ve-kws-visual-modality-enhanced-end-to-end-keyword-spotting-2302.13523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ve-kws-visual-modality-enhanced-end-to-end-keyword-spotting-2302.13523"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-descriptor-based-control-for-deep-audio-synthesis-2302.13542</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-descriptor-based-control-for-deep-audio-synthesis-2302.13542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-descriptor-based-control-for-deep-audio-synthesis-2302.13542"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-analysis-of-latent-regressor-losses-for-singing-voice-conversion-2302.13678</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-analysis-of-latent-regressor-losses-for-singing-voice-conversion-2302.13678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-analysis-of-latent-regressor-losses-for-singing-voice-conversion-2302.13678"/></url>
<url><loc>https://scifaro.com/en/abs/dst-deformable-speech-transformer-for-emotion-recognition-2302.13729</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dst-deformable-speech-transformer-for-emotion-recognition-2302.13729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dst-deformable-speech-transformer-for-emotion-recognition-2302.13729"/></url>
<url><loc>https://scifaro.com/en/abs/phone-and-speaker-spatial-organization-in-self-supervised-speech-representations-2302.14055</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phone-and-speaker-spatial-organization-in-self-supervised-speech-representations-2302.14055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phone-and-speaker-spatial-organization-in-self-supervised-speech-representations-2302.14055"/></url>
<url><loc>https://scifaro.com/en/abs/explanations-for-automatic-speech-recognition-2302.14062</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explanations-for-automatic-speech-recognition-2302.14062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explanations-for-automatic-speech-recognition-2302.14062"/></url>
<url><loc>https://scifaro.com/en/abs/halluaudio-hallucinating-frequency-as-concepts-for-few-shot-audio-classification-2302.14204</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/halluaudio-hallucinating-frequency-as-concepts-for-few-shot-audio-classification-2302.14204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/halluaudio-hallucinating-frequency-as-concepts-for-few-shot-audio-classification-2302.14204"/></url>
<url><loc>https://scifaro.com/en/abs/adapter-incremental-continual-learning-of-efficient-audio-spectrogram-transformers-2302.14314</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapter-incremental-continual-learning-of-efficient-audio-spectrogram-transformers-2302.14314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapter-incremental-continual-learning-of-efficient-audio-spectrogram-transformers-2302.14314"/></url>
<url><loc>https://scifaro.com/en/abs/crossspeech-speaker-independent-acoustic-representation-for-cross-lingual-speech-synthesis-2302.14370</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossspeech-speaker-independent-acoustic-representation-for-cross-lingual-speech-synthesis-2302.14370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossspeech-speaker-independent-acoustic-representation-for-cross-lingual-speech-synthesis-2302.14370"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-self-supervised-pre-trained-asr-models-for-dysarthric-and-elderly-speech-recognition-2302.14564</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-self-supervised-pre-trained-asr-models-for-dysarthric-and-elderly-speech-recognition-2302.14564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-self-supervised-pre-trained-asr-models-for-dysarthric-and-elderly-speech-recognition-2302.14564"/></url>
<url><loc>https://scifaro.com/en/abs/dehubert-disentangling-noise-in-a-self-supervised-model-for-robust-speech-recognition-2302.14597</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dehubert-disentangling-noise-in-a-self-supervised-model-for-robust-speech-recognition-2302.14597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dehubert-disentangling-noise-in-a-self-supervised-model-for-robust-speech-recognition-2302.14597"/></url>
<url><loc>https://scifaro.com/en/abs/pcf-ecapa-tdnn-with-progressive-channel-fusion-for-speaker-verification-2303.00204</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pcf-ecapa-tdnn-with-progressive-channel-fusion-for-speaker-verification-2303.00204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pcf-ecapa-tdnn-with-progressive-channel-fusion-for-speaker-verification-2303.00204"/></url>
<url><loc>https://scifaro.com/en/abs/distance-based-weight-transfer-from-near-field-to-far-field-speaker-verification-2303.00264</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distance-based-weight-transfer-from-near-field-to-far-field-speaker-verification-2303.00264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distance-based-weight-transfer-from-near-field-to-far-field-speaker-verification-2303.00264"/></url>
<url><loc>https://scifaro.com/en/abs/cam-a-fast-and-efficient-network-for-speaker-verification-using-context-aware-masking-2303.00332</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cam-a-fast-and-efficient-network-for-speaker-verification-using-context-aware-masking-2303.00332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cam-a-fast-and-efficient-network-for-speaker-verification-using-context-aware-masking-2303.00332"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-audio-visual-synchronization-for-lip-to-speech-synthesis-2303.00502</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-audio-visual-synchronization-for-lip-to-speech-synthesis-2303.00502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-audio-visual-synchronization-for-lip-to-speech-synthesis-2303.00502"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-speech-data-augmentation-methods-using-s3prl-toolkit-2303.00510</loc><lastmod>2024-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-speech-data-augmentation-methods-using-s3prl-toolkit-2303.00510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-speech-data-augmentation-methods-using-s3prl-toolkit-2303.00510"/></url>
<url><loc>https://scifaro.com/en/abs/whisperx-time-accurate-speech-transcription-of-long-form-audio-2303.00747</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisperx-time-accurate-speech-transcription-of-long-form-audio-2303.00747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisperx-time-accurate-speech-transcription-of-long-form-audio-2303.00747"/></url>
<url><loc>https://scifaro.com/en/abs/distilling-multi-level-x-vector-knowledge-for-small-footprint-speaker-verification-2303.01125</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distilling-multi-level-x-vector-knowledge-for-small-footprint-speaker-verification-2303.01125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distilling-multi-level-x-vector-knowledge-for-small-footprint-speaker-verification-2303.01125"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-aware-anti-spoofing-2303.01126</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-aware-anti-spoofing-2303.01126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-aware-anti-spoofing-2303.01126"/></url>
<url><loc>https://scifaro.com/en/abs/learning-from-yourself-a-self-distillation-method-for-fake-speech-detection-2303.01211</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-from-yourself-a-self-distillation-method-for-fake-speech-detection-2303.01211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-from-yourself-a-self-distillation-method-for-fake-speech-detection-2303.01211"/></url>
<url><loc>https://scifaro.com/en/abs/defending-against-adversarial-audio-via-diffusion-model-2303.01507</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defending-against-adversarial-audio-via-diffusion-model-2303.01507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defending-against-adversarial-audio-via-diffusion-model-2303.01507"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-emotional-control-of-text-to-speech-learning-to-rank-inter-and-intra-class-emotion-intensities-2303.01508</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-emotional-control-of-text-to-speech-learning-to-rank-inter-and-intra-class-emotion-intensities-2303.01508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-emotional-control-of-text-to-speech-learning-to-rank-inter-and-intra-class-emotion-intensities-2303.01508"/></url>
<url><loc>https://scifaro.com/en/abs/wesper-zero-shot-and-realtime-whisper-to-normal-voice-conversion-for-whisper-based-speech-interactions-2303.01639</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wesper-zero-shot-and-realtime-whisper-to-normal-voice-conversion-for-whisper-based-speech-interactions-2303.01639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wesper-zero-shot-and-realtime-whisper-to-normal-voice-conversion-for-whisper-based-speech-interactions-2303.01639"/></url>
<url><loc>https://scifaro.com/en/abs/miipher-a-robust-speech-restoration-model-integrating-self-supervised-speech-and-text-representations-2303.01664</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/miipher-a-robust-speech-restoration-model-integrating-self-supervised-speech-and-text-representations-2303.01664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/miipher-a-robust-speech-restoration-model-integrating-self-supervised-speech-and-text-representations-2303.01664"/></url>
<url><loc>https://scifaro.com/en/abs/loopergp-a-loopable-sequence-model-for-live-coding-performance-using-guitarpro-tablature-2303.01665</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loopergp-a-loopable-sequence-model-for-live-coding-performance-using-guitarpro-tablature-2303.01665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loopergp-a-loopable-sequence-model-for-live-coding-performance-using-guitarpro-tablature-2303.01665"/></url>
<url><loc>https://scifaro.com/en/abs/dwformer-dynamic-window-transformer-for-speech-emotion-recognition-2303.01694</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dwformer-dynamic-window-transformer-for-speech-emotion-recognition-2303.01694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dwformer-dynamic-window-transformer-for-speech-emotion-recognition-2303.01694"/></url>
<url><loc>https://scifaro.com/en/abs/unified-keyword-spotting-and-audio-tagging-on-mobile-devices-with-transformers-2303.01812</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-keyword-spotting-and-audio-tagging-on-mobile-devices-with-transformers-2303.01812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-keyword-spotting-and-audio-tagging-on-mobile-devices-with-transformers-2303.01812"/></url>
<url><loc>https://scifaro.com/en/abs/spectrogram-inversion-for-audio-source-separation-via-consistency-mixing-and-magnitude-constraints-2303.01864</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrogram-inversion-for-audio-source-separation-via-consistency-mixing-and-magnitude-constraints-2303.01864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrogram-inversion-for-audio-source-separation-via-consistency-mixing-and-magnitude-constraints-2303.01864"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-and-visualising-intended-emotion-in-an-expressive-piano-performance-2303.01875</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-and-visualising-intended-emotion-in-an-expressive-piano-performance-2303.01875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-and-visualising-intended-emotion-in-an-expressive-piano-performance-2303.01875"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-audio-embedding-extractors-2303.01879</loc><lastmod>2023-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-audio-embedding-extractors-2303.01879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-audio-embedding-extractors-2303.01879"/></url>
<url><loc>https://scifaro.com/en/abs/automatch-a-large-scale-audio-beat-matching-benchmark-for-boosting-deep-learning-assistant-video-editing-2303.01884</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatch-a-large-scale-audio-beat-matching-benchmark-for-boosting-deep-learning-assistant-video-editing-2303.01884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatch-a-large-scale-audio-beat-matching-benchmark-for-boosting-deep-learning-assistant-video-editing-2303.01884"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-post-challenge-audio-visual-wake-word-spotting-system-for-the-2021-misp-challenge-deep-analysis-2303.02348</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-post-challenge-audio-visual-wake-word-spotting-system-for-the-2021-misp-challenge-deep-analysis-2303.02348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-post-challenge-audio-visual-wake-word-spotting-system-for-the-2021-misp-challenge-deep-analysis-2303.02348"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-framework-for-learning-procedural-audio-models-of-environmental-sounds-2303.02396</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-framework-for-learning-procedural-audio-models-of-environmental-sounds-2303.02396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-framework-for-learning-procedural-audio-models-of-environmental-sounds-2303.02396"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-y-net-architecture-for-singing-voice-separation-2303.02599</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-y-net-architecture-for-singing-voice-separation-2303.02599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-y-net-architecture-for-singing-voice-separation-2303.02599"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-graph-learning-for-acoustic-event-classification-2303.02665</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-graph-learning-for-acoustic-event-classification-2303.02665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-graph-learning-for-acoustic-event-classification-2303.02665"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-network-for-robust-speaker-recognition-2303.02673</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-network-for-robust-speaker-recognition-2303.02673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-network-for-robust-speaker-recognition-2303.02673"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-strategies-for-on-device-low-complexity-source-separation-with-conv-tasnet-2303.03005</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-strategies-for-on-device-low-complexity-source-separation-with-conv-tasnet-2303.03005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-strategies-for-on-device-low-complexity-source-separation-with-conv-tasnet-2303.03005"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-synthetic-training-data-for-the-supervised-classification-of-rat-ultrasonic-vocalizations-2303.03183</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-synthetic-training-data-for-the-supervised-classification-of-rat-ultrasonic-vocalizations-2303.03183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-synthetic-training-data-for-the-supervised-classification-of-rat-ultrasonic-vocalizations-2303.03183"/></url>
<url><loc>https://scifaro.com/en/abs/approach-to-learning-generalized-audio-representation-through-batch-embedding-covariance-regularization-and-constant-q-transforms-2303.03591</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approach-to-learning-generalized-audio-representation-through-batch-embedding-covariance-regularization-and-constant-q-transforms-2303.03591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approach-to-learning-generalized-audio-representation-through-batch-embedding-covariance-regularization-and-constant-q-transforms-2303.03591"/></url>
<url><loc>https://scifaro.com/en/abs/face-fast-accurate-and-context-aware-audio-annotation-and-classification-2303.03666</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/face-fast-accurate-and-context-aware-audio-annotation-and-classification-2303.03666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/face-fast-accurate-and-context-aware-audio-annotation-and-classification-2303.03666"/></url>
<url><loc>https://scifaro.com/en/abs/improving-self-supervised-learning-for-audio-representations-by-feature-diversity-and-decorrelation-2303.03717</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-self-supervised-learning-for-audio-representations-by-feature-diversity-and-decorrelation-2303.03717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-self-supervised-learning-for-audio-representations-by-feature-diversity-and-decorrelation-2303.03717"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-stage-triple-path-method-for-speech-separation-in-noisy-and-reverberant-environments-2303.03732</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-stage-triple-path-method-for-speech-separation-in-noisy-and-reverberant-environments-2303.03732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-stage-triple-path-method-for-speech-separation-in-noisy-and-reverberant-environments-2303.03732"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dimensional-and-multi-scale-modeling-for-speech-separation-optimized-by-discriminative-learning-2303.03737</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dimensional-and-multi-scale-modeling-for-speech-separation-optimized-by-discriminative-learning-2303.03737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dimensional-and-multi-scale-modeling-for-speech-separation-optimized-by-discriminative-learning-2303.03737"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-pre-trained-audioldm-for-sound-generation-a-benchmark-study-2303.03857</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-pre-trained-audioldm-for-sound-generation-a-benchmark-study-2303.03857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-pre-trained-audioldm-for-sound-generation-a-benchmark-study-2303.03857"/></url>
<url><loc>https://scifaro.com/en/abs/danceanyway-synthesizing-beat-guided-3d-dances-with-randomized-temporal-contrastive-learning-2303.03870</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/danceanyway-synthesizing-beat-guided-3d-dances-with-randomized-temporal-contrastive-learning-2303.03870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/danceanyway-synthesizing-beat-guided-3d-dances-with-randomized-temporal-contrastive-learning-2303.03870"/></url>
<url><loc>https://scifaro.com/en/abs/an-inception-residual-based-architecture-with-multi-objective-loss-for-detecting-respiratory-anomalies-2303.04104</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-inception-residual-based-architecture-with-multi-objective-loss-for-detecting-respiratory-anomalies-2303.04104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-inception-residual-based-architecture-with-multi-objective-loss-for-detecting-respiratory-anomalies-2303.04104"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speech-representation-learning-for-keyword-spotting-with-light-weight-transformers-2303.04255</loc><lastmod>2023-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speech-representation-learning-for-keyword-spotting-with-light-weight-transformers-2303.04255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speech-representation-learning-for-keyword-spotting-with-light-weight-transformers-2303.04255"/></url>
<url><loc>https://scifaro.com/en/abs/onsets-and-velocities-affordable-real-time-piano-transcription-using-convolutional-neural-networks-2303.04485</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/onsets-and-velocities-affordable-real-time-piano-transcription-using-convolutional-neural-networks-2303.04485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/onsets-and-velocities-affordable-real-time-piano-transcription-using-convolutional-neural-networks-2303.04485"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-efficient-tuned-learning-audio-representation-method-from-brivl-2303.04585</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-efficient-tuned-learning-audio-representation-method-from-brivl-2303.04585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-efficient-tuned-learning-audio-representation-method-from-brivl-2303.04585"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-network-with-decoupled-knowledge-distillation-for-speech-emotion-recognition-2303.05134</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-network-with-decoupled-knowledge-distillation-for-speech-emotion-recognition-2303.05134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-network-with-decoupled-knowledge-distillation-for-speech-emotion-recognition-2303.05134"/></url>
<url><loc>https://scifaro.com/en/abs/improving-few-shot-learning-for-talking-face-system-with-tts-data-augmentation-2303.05322</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-few-shot-learning-for-talking-face-system-with-tts-data-augmentation-2303.05322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-few-shot-learning-for-talking-face-system-with-tts-data-augmentation-2303.05322"/></url>
<url><loc>https://scifaro.com/en/abs/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning-2303.05338</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning-2303.05338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmcosine-multi-modal-cosine-loss-towards-balanced-audio-visual-fine-grained-learning-2303.05338"/></url>
<url><loc>https://scifaro.com/en/abs/told-a-novel-two-stage-overlap-aware-framework-for-speaker-diarization-2303.05397</loc><lastmod>2023-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/told-a-novel-two-stage-overlap-aware-framework-for-speaker-diarization-2303.05397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/told-a-novel-two-stage-overlap-aware-framework-for-speaker-diarization-2303.05397"/></url>
<url><loc>https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-causal-intervention-2303.05678</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-causal-intervention-2303.05678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-causal-intervention-2303.05678"/></url>
<url><loc>https://scifaro.com/en/abs/improving-text-audio-retrieval-by-text-aware-attention-pooling-and-prior-matrix-revised-loss-2303.05681</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-text-audio-retrieval-by-text-aware-attention-pooling-and-prior-matrix-revised-loss-2303.05681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-text-audio-retrieval-by-text-aware-attention-pooling-and-prior-matrix-revised-loss-2303.05681"/></url>
<url><loc>https://scifaro.com/en/abs/mixpgd-hybrid-adversarial-training-for-speech-recognition-systems-2303.05758</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixpgd-hybrid-adversarial-training-for-speech-recognition-systems-2303.05758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixpgd-hybrid-adversarial-training-for-speech-recognition-systems-2303.05758"/></url>
<url><loc>https://scifaro.com/en/abs/tayloraecnet-a-taylor-style-neural-network-for-full-band-echo-cancellation-2303.06379</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tayloraecnet-a-taylor-style-neural-network-for-full-band-echo-cancellation-2303.06379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tayloraecnet-a-taylor-style-neural-network-for-full-band-echo-cancellation-2303.06379"/></url>
<url><loc>https://scifaro.com/en/abs/analysing-the-masked-predictive-coding-training-criterion-for-pre-training-a-speech-representation-model-2303.06982</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysing-the-masked-predictive-coding-training-criterion-for-pre-training-a-speech-representation-model-2303.06982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysing-the-masked-predictive-coding-training-criterion-for-pre-training-a-speech-representation-model-2303.06982"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-speaker-extraction-algorithm-under-adverse-acoustic-conditions-using-a-single-microphone-2303.07072</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-speaker-extraction-algorithm-under-adverse-acoustic-conditions-using-a-single-microphone-2303.07072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-speaker-extraction-algorithm-under-adverse-acoustic-conditions-using-a-single-microphone-2303.07072"/></url>
<url><loc>https://scifaro.com/en/abs/vani-very-lightweight-accent-controllable-tts-for-native-and-non-native-speakers-with-identity-preservation-2303.07578</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vani-very-lightweight-accent-controllable-tts-for-native-and-non-native-speakers-with-identity-preservation-2303.07578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vani-very-lightweight-accent-controllable-tts-for-native-and-non-native-speakers-with-identity-preservation-2303.07578"/></url>
<url><loc>https://scifaro.com/en/abs/cat-causal-audio-transformer-for-audio-classification-2303.07626</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cat-causal-audio-transformer-for-audio-classification-2303.07626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cat-causal-audio-transformer-for-audio-classification-2303.07626"/></url>
<url><loc>https://scifaro.com/en/abs/feature-rich-audio-model-inversion-for-data-free-knowledge-distillation-towards-general-sound-classification-2303.07643</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-rich-audio-model-inversion-for-data-free-knowledge-distillation-towards-general-sound-classification-2303.07643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-rich-audio-model-inversion-for-data-free-knowledge-distillation-towards-general-sound-classification-2303.07643"/></url>
<url><loc>https://scifaro.com/en/abs/improving-music-genre-classification-from-multi-modal-properties-of-music-and-genre-correlations-perspective-2303.07667</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-music-genre-classification-from-multi-modal-properties-of-music-and-genre-correlations-perspective-2303.07667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-music-genre-classification-from-multi-modal-properties-of-music-and-genre-correlations-perspective-2303.07667"/></url>
<url><loc>https://scifaro.com/en/abs/qi-tts-questioning-intonation-control-for-emotional-speech-synthesis-2303.07682</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qi-tts-questioning-intonation-control-for-emotional-speech-synthesis-2303.07682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qi-tts-questioning-intonation-control-for-emotional-speech-synthesis-2303.07682"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-alignment-mask-ctc-improved-mask-ctc-with-aligned-cross-entropy-2303.07687</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-alignment-mask-ctc-improved-mask-ctc-with-aligned-cross-entropy-2303.07687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-alignment-mask-ctc-improved-mask-ctc-with-aligned-cross-entropy-2303.07687"/></url>
<url><loc>https://scifaro.com/en/abs/improving-prosody-for-cross-speaker-style-transfer-by-semi-supervised-style-extractor-and-hierarchical-modeling-in-speech-synthesis-2303.07711</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-prosody-for-cross-speaker-style-transfer-by-semi-supervised-style-extractor-and-hierarchical-modeling-in-speech-synthesis-2303.07711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-prosody-for-cross-speaker-style-transfer-by-semi-supervised-style-extractor-and-hierarchical-modeling-in-speech-synthesis-2303.07711"/></url>
<url><loc>https://scifaro.com/en/abs/diffuseroll-multi-track-multi-category-music-generation-based-on-diffusion-model-2303.07794</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffuseroll-multi-track-multi-category-music-generation-based-on-diffusion-model-2303.07794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffuseroll-multi-track-multi-category-music-generation-based-on-diffusion-model-2303.07794"/></url>
<url><loc>https://scifaro.com/en/abs/blat-bootstrapping-language-audio-pre-training-based-on-audioset-tag-guided-synthetic-data-2303.07902</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blat-bootstrapping-language-audio-pre-training-based-on-audioset-tag-guided-synthetic-data-2303.07902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blat-bootstrapping-language-audio-pre-training-based-on-audioset-tag-guided-synthetic-data-2303.07902"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-bias-and-fairness-in-deep-speaker-recognition-2303.08026</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-bias-and-fairness-in-deep-speaker-recognition-2303.08026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-bias-and-fairness-in-deep-speaker-recognition-2303.08026"/></url>
<url><loc>https://scifaro.com/en/abs/facilitating-deep-acoustic-phenotyping-a-basic-coding-scheme-of-infant-vocalisations-preluding-computational-analysis-machine-learning-and-clinical-reasoning-2303.08239</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/facilitating-deep-acoustic-phenotyping-a-basic-coding-scheme-of-infant-vocalisations-preluding-computational-analysis-machine-learning-and-clinical-reasoning-2303.08239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/facilitating-deep-acoustic-phenotyping-a-basic-coding-scheme-of-infant-vocalisations-preluding-computational-analysis-machine-learning-and-clinical-reasoning-2303.08239"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-emotion-transfer-by-manipulating-speech-style-latents-2303.08329</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-by-manipulating-speech-style-latents-2303.08329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-by-manipulating-speech-style-latents-2303.08329"/></url>
<url><loc>https://scifaro.com/en/abs/autonomous-soundscape-augmentation-with-multimodal-fusion-of-visual-and-participant-linked-inputs-2303.08342</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autonomous-soundscape-augmentation-with-multimodal-fusion-of-visual-and-participant-linked-inputs-2303.08342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autonomous-soundscape-augmentation-with-multimodal-fusion-of-visual-and-participant-linked-inputs-2303.08342"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-based-diagnosis-and-analysis-of-lung-sound-aberrations-2303.08362</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-based-diagnosis-and-analysis-of-lung-sound-aberrations-2303.08362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-based-diagnosis-and-analysis-of-lung-sound-aberrations-2303.08362"/></url>
<url><loc>https://scifaro.com/en/abs/generating-symbolic-music-using-diffusion-models-2303.08385</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-symbolic-music-using-diffusion-models-2303.08385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-symbolic-music-using-diffusion-models-2303.08385"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-unsupervised-audio-representation-learning-via-adversarial-sample-generation-2303.08561</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-unsupervised-audio-representation-learning-via-adversarial-sample-generation-2303.08561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-unsupervised-audio-representation-learning-via-adversarial-sample-generation-2303.08561"/></url>
<url><loc>https://scifaro.com/en/abs/phoneix-acoustic-feature-processing-strategy-for-enhanced-singing-pronunciation-with-phoneme-distribution-predictor-2303.08607</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneix-acoustic-feature-processing-strategy-for-enhanced-singing-pronunciation-with-phoneme-distribution-predictor-2303.08607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneix-acoustic-feature-processing-strategy-for-enhanced-singing-pronunciation-with-phoneme-distribution-predictor-2303.08607"/></url>
<url><loc>https://scifaro.com/en/abs/blind-estimation-of-audio-processing-graph-2303.08610</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-estimation-of-audio-processing-graph-2303.08610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-estimation-of-audio-processing-graph-2303.08610"/></url>
<url><loc>https://scifaro.com/en/abs/improving-perceptual-quality-intelligibility-and-acoustics-on-voip-platforms-2303.09048</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-perceptual-quality-intelligibility-and-acoustics-on-voip-platforms-2303.09048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-perceptual-quality-intelligibility-and-acoustics-on-voip-platforms-2303.09048"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-sound-event-classification-using-a-sound-attribute-vector-with-global-and-local-feature-learning-2303.10316</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-sound-event-classification-using-a-sound-attribute-vector-with-global-and-local-feature-learning-2303.10316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-sound-event-classification-using-a-sound-attribute-vector-with-global-and-local-feature-learning-2303.10316"/></url>
<url><loc>https://scifaro.com/en/abs/weight-sharing-supernet-for-searching-specialized-acoustic-event-classification-networks-across-device-constraints-2303.10351</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weight-sharing-supernet-for-searching-specialized-acoustic-event-classification-networks-across-device-constraints-2303.10351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weight-sharing-supernet-for-searching-specialized-acoustic-event-classification-networks-across-device-constraints-2303.10351"/></url>
<url><loc>https://scifaro.com/en/abs/earcough-enabling-continuous-subject-cough-event-detection-on-hearables-2303.10445</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/earcough-enabling-continuous-subject-cough-event-detection-on-hearables-2303.10445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/earcough-enabling-continuous-subject-cough-event-detection-on-hearables-2303.10445"/></url>
<url><loc>https://scifaro.com/en/abs/content-adaptive-front-end-for-audio-classification-2303.10446</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-adaptive-front-end-for-audio-classification-2303.10446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-adaptive-front-end-for-audio-classification-2303.10446"/></url>
<url><loc>https://scifaro.com/en/abs/textless-speech-to-music-retrieval-using-emotion-similarity-2303.10539</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/textless-speech-to-music-retrieval-using-emotion-similarity-2303.10539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/textless-speech-to-music-retrieval-using-emotion-similarity-2303.10539"/></url>
<url><loc>https://scifaro.com/en/abs/audio-text-models-do-not-yet-leverage-natural-language-2303.10667</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-text-models-do-not-yet-leverage-natural-language-2303.10667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-text-models-do-not-yet-leverage-natural-language-2303.10667"/></url>
<url><loc>https://scifaro.com/en/abs/multiscale-audio-spectrogram-transformer-for-efficient-audio-classification-2303.10757</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiscale-audio-spectrogram-transformer-for-efficient-audio-classification-2303.10757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiscale-audio-spectrogram-transformer-for-efficient-audio-classification-2303.10757"/></url>
<url><loc>https://scifaro.com/en/abs/relate-auditory-speech-to-eeg-by-shallow-deep-attention-based-network-2303.10897</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relate-auditory-speech-to-eeg-by-shallow-deep-attention-based-network-2303.10897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relate-auditory-speech-to-eeg-by-shallow-deep-attention-based-network-2303.10897"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-representation-learning-for-small-footprint-keyword-spotting-2303.10912</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-representation-learning-for-small-footprint-keyword-spotting-2303.10912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-representation-learning-for-small-footprint-keyword-spotting-2303.10912"/></url>
<url><loc>https://scifaro.com/en/abs/ds-tdnn-dual-stream-time-delay-neural-network-with-global-aware-filter-for-speaker-verification-2303.11020</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ds-tdnn-dual-stream-time-delay-neural-network-with-global-aware-filter-for-speaker-verification-2303.11020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ds-tdnn-dual-stream-time-delay-neural-network-with-global-aware-filter-for-speaker-verification-2303.11020"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2023-deep-noise-suppression-challenge-2303.11510</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2023-deep-noise-suppression-challenge-2303.11510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2023-deep-noise-suppression-challenge-2303.11510"/></url>
<url><loc>https://scifaro.com/en/abs/bytecover3-accurate-cover-song-identification-on-short-queries-2303.11692</loc><lastmod>2023-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bytecover3-accurate-cover-song-identification-on-short-queries-2303.11692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bytecover3-accurate-cover-song-identification-on-short-queries-2303.11692"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-lightweight-text-to-speech-voice-cloning-with-adaptive-structured-pruning-2303.11816</loc><lastmod>2023-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-lightweight-text-to-speech-voice-cloning-with-adaptive-structured-pruning-2303.11816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-lightweight-text-to-speech-voice-cloning-with-adaptive-structured-pruning-2303.11816"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-turkish-speech-recognition-via-hybrid-ctc-attention-architecture-and-multi-feature-fusion-network-2303.12300</loc><lastmod>2023-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-turkish-speech-recognition-via-hybrid-ctc-attention-architecture-and-multi-feature-fusion-network-2303.12300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-turkish-speech-recognition-via-hybrid-ctc-attention-architecture-and-multi-feature-fusion-network-2303.12300"/></url>
<url><loc>https://scifaro.com/en/abs/dual-quaternions-theory-and-applications-in-sound-2303.12692</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-quaternions-theory-and-applications-in-sound-2303.12692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-quaternions-theory-and-applications-in-sound-2303.12692"/></url>
<url><loc>https://scifaro.com/en/abs/lmcodec-a-low-bitrate-speech-codec-with-causal-transformer-models-2303.12984</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lmcodec-a-low-bitrate-speech-codec-with-causal-transformer-models-2303.12984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lmcodec-a-low-bitrate-speech-codec-with-causal-transformer-models-2303.12984"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-universal-transformer-block-reusing-with-adaptor-in-transformer-for-automatic-speech-recognition-2303.13072</loc><lastmod>2023-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-universal-transformer-block-reusing-with-adaptor-in-transformer-for-automatic-speech-recognition-2303.13072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-universal-transformer-block-reusing-with-adaptor-in-transformer-for-automatic-speech-recognition-2303.13072"/></url>
<url><loc>https://scifaro.com/en/abs/frame-level-multi-label-playing-technique-detection-using-multi-scale-network-and-self-attention-mechanism-2303.13272</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-level-multi-label-playing-technique-detection-using-multi-scale-network-and-self-attention-mechanism-2303.13272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-level-multi-label-playing-technique-detection-using-multi-scale-network-and-self-attention-mechanism-2303.13272"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-audio-diffusion-models-text-to-speech-synthesis-and-enhancement-in-generative-ai-2303.13336</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-audio-diffusion-models-text-to-speech-synthesis-and-enhancement-in-generative-ai-2303.13336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-audio-diffusion-models-text-to-speech-synthesis-and-enhancement-in-generative-ai-2303.13336"/></url>
<url><loc>https://scifaro.com/en/abs/in-depth-analysis-of-music-structure-as-a-text-network-2303.13631</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-depth-analysis-of-music-structure-as-a-text-network-2303.13631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-depth-analysis-of-music-structure-as-a-text-network-2303.13631"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-structure-analysis-with-graph-representations-and-changepoint-detection-methods-2303.13881</loc><lastmod>2023-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-structure-analysis-with-graph-representations-and-changepoint-detection-methods-2303.13881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-structure-analysis-with-graph-representations-and-changepoint-detection-methods-2303.13881"/></url>
<url><loc>https://scifaro.com/en/abs/wave-u-net-discriminator-fast-and-lightweight-discriminator-for-generative-adversarial-network-based-speech-synthesis-2303.13909</loc><lastmod>2023-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wave-u-net-discriminator-fast-and-lightweight-discriminator-for-generative-adversarial-network-based-speech-synthesis-2303.13909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wave-u-net-discriminator-fast-and-lightweight-discriminator-for-generative-adversarial-network-based-speech-synthesis-2303.13909"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speech-enhancement-assisted-by-multi-resolution-frequency-encoder-and-decoder-2303.14593</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-assisted-by-multi-resolution-frequency-encoder-and-decoder-2303.14593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-assisted-by-multi-resolution-frequency-encoder-and-decoder-2303.14593"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-environmental-sound-classification-using-diffusion-probabilistic-model-with-top-k-selection-discriminator-2303.15161</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-environmental-sound-classification-using-diffusion-probabilistic-model-with-top-k-selection-discriminator-2303.15161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-environmental-sound-classification-using-diffusion-probabilistic-model-with-top-k-selection-discriminator-2303.15161"/></url>
<url><loc>https://scifaro.com/en/abs/pitchclass2vec-symbolic-music-structure-segmentation-with-chord-embeddings-2303.15306</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitchclass2vec-symbolic-music-structure-segmentation-with-chord-embeddings-2303.15306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitchclass2vec-symbolic-music-structure-segmentation-with-chord-embeddings-2303.15306"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-background-music-for-a-fighting-game-a-multi-instrument-volume-modulation-approach-2303.15734</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-background-music-for-a-fighting-game-a-multi-instrument-volume-modulation-approach-2303.15734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-background-music-for-a-fighting-game-a-multi-instrument-volume-modulation-approach-2303.15734"/></url>
<url><loc>https://scifaro.com/en/abs/transaudio-towards-the-transferable-adversarial-audio-attack-via-learning-contextualized-perturbations-2303.15940</loc><lastmod>2023-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transaudio-towards-the-transferable-adversarial-audio-attack-via-learning-contextualized-perturbations-2303.15940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transaudio-towards-the-transferable-adversarial-audio-attack-via-learning-contextualized-perturbations-2303.15940"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-anomaly-detection-and-localization-of-machine-audio-a-gan-based-approach-2303.17949</loc><lastmod>2023-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-anomaly-detection-and-localization-of-machine-audio-a-gan-based-approach-2303.17949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-anomaly-detection-and-localization-of-machine-audio-a-gan-based-approach-2303.17949"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-compression-framework-for-efficient-speech-driven-talking-face-generation-2304.00471</loc><lastmod>2023-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-compression-framework-for-efficient-speech-driven-talking-face-generation-2304.00471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-compression-framework-for-efficient-speech-driven-talking-face-generation-2304.00471"/></url>
<url><loc>https://scifaro.com/en/abs/musical-creativity-enabled-by-nonlinear-oscillations-of-a-bubble-in-water-2304.00822</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-creativity-enabled-by-nonlinear-oscillations-of-a-bubble-in-water-2304.00822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-creativity-enabled-by-nonlinear-oscillations-of-a-bubble-in-water-2304.00822"/></url>
<url><loc>https://scifaro.com/en/abs/audit-audio-editing-by-following-instructions-with-latent-diffusion-models-2304.00830</loc><lastmod>2023-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audit-audio-editing-by-following-instructions-with-latent-diffusion-models-2304.00830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audit-audio-editing-by-following-instructions-with-latent-diffusion-models-2304.00830"/></url>
<url><loc>https://scifaro.com/en/abs/designing-and-evaluating-speech-emotion-recognition-systems-a-reality-check-case-study-with-iemocap-2304.00860</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/designing-and-evaluating-speech-emotion-recognition-systems-a-reality-check-case-study-with-iemocap-2304.00860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/designing-and-evaluating-speech-emotion-recognition-systems-a-reality-check-case-study-with-iemocap-2304.00860"/></url>
<url><loc>https://scifaro.com/en/abs/lipsfus-a-neuromorphic-dataset-for-audio-visual-sensory-fusion-of-lip-reading-2304.01080</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipsfus-a-neuromorphic-dataset-for-audio-visual-sensory-fusion-of-lip-reading-2304.01080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipsfus-a-neuromorphic-dataset-for-audio-visual-sensory-fusion-of-lip-reading-2304.01080"/></url>
<url><loc>https://scifaro.com/en/abs/dual-attention-neural-transducers-for-efficient-wake-word-spotting-in-speech-recognition-2304.01905</loc><lastmod>2023-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-attention-neural-transducers-for-efficient-wake-word-spotting-in-speech-recognition-2304.01905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-attention-neural-transducers-for-efficient-wake-word-spotting-in-speech-recognition-2304.01905"/></url>
<url><loc>https://scifaro.com/en/abs/pac-hubert-self-supervised-music-source-separation-via-primitive-auditory-clustering-and-hidden-unit-bert-2304.02160</loc><lastmod>2023-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pac-hubert-self-supervised-music-source-separation-via-primitive-auditory-clustering-and-hidden-unit-bert-2304.02160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pac-hubert-self-supervised-music-source-separation-via-primitive-auditory-clustering-and-hidden-unit-bert-2304.02160"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-audio-captioning-transformer-with-patchout-and-text-guidance-2304.02916</loc><lastmod>2023-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-audio-captioning-transformer-with-patchout-and-text-guidance-2304.02916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-audio-captioning-transformer-with-patchout-and-text-guidance-2304.02916"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-reactions-to-music-via-earable-sensing-2304.03295</loc><lastmod>2023-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-reactions-to-music-via-earable-sensing-2304.03295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-reactions-to-music-via-earable-sensing-2304.03295"/></url>
<url><loc>https://scifaro.com/en/abs/dsvae-interpretable-disentangled-representation-for-synthetic-speech-detection-2304.03323</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsvae-interpretable-disentangled-representation-for-synthetic-speech-detection-2304.03323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsvae-interpretable-disentangled-representation-for-synthetic-speech-detection-2304.03323"/></url>
<url><loc>https://scifaro.com/en/abs/on-site-noise-exposure-technique-for-noise-robust-machine-fault-classification-2304.03522</loc><lastmod>2023-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-site-noise-exposure-technique-for-noise-robust-machine-fault-classification-2304.03522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-site-noise-exposure-technique-for-noise-robust-machine-fault-classification-2304.03522"/></url>
<url><loc>https://scifaro.com/en/abs/graph-attention-for-automated-audio-captioning-2304.03586</loc><lastmod>2023-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-attention-for-automated-audio-captioning-2304.03586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-attention-for-automated-audio-captioning-2304.03586"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-using-audio-representation-with-machine-id-based-contrastive-learning-pretraining-2304.03588</loc><lastmod>2023-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-audio-representation-with-machine-id-based-contrastive-learning-pretraining-2304.03588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-audio-representation-with-machine-id-based-contrastive-learning-pretraining-2304.03588"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-st-v2-multipurpose-spoken-language-translation-toolkit-2304.04596</loc><lastmod>2023-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-st-v2-multipurpose-spoken-language-translation-toolkit-2304.04596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-st-v2-multipurpose-spoken-language-translation-toolkit-2304.04596"/></url>
<url><loc>https://scifaro.com/en/abs/in-situ-crack-and-keyhole-pore-detection-in-laser-directed-energy-deposition-through-acoustic-signal-and-deep-learning-2304.04598</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-situ-crack-and-keyhole-pore-detection-in-laser-directed-energy-deposition-through-acoustic-signal-and-deep-learning-2304.04598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-situ-crack-and-keyhole-pore-detection-in-laser-directed-energy-deposition-through-acoustic-signal-and-deep-learning-2304.04598"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-to-speech-translation-with-multiple-tts-targets-2304.04618</loc><lastmod>2023-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-to-speech-translation-with-multiple-tts-targets-2304.04618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-to-speech-translation-with-multiple-tts-targets-2304.04618"/></url>
<url><loc>https://scifaro.com/en/abs/affectmachine-classical-a-novel-system-for-generating-affective-classical-music-2304.04915</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/affectmachine-classical-a-novel-system-for-generating-affective-classical-music-2304.04915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/affectmachine-classical-a-novel-system-for-generating-affective-classical-music-2304.04915"/></url>
<url><loc>https://scifaro.com/en/abs/sim-t-simplify-the-transformer-network-by-multiplexing-technique-for-speech-recognition-2304.04991</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sim-t-simplify-the-transformer-network-by-multiplexing-technique-for-speech-recognition-2304.04991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sim-t-simplify-the-transformer-network-by-multiplexing-technique-for-speech-recognition-2304.04991"/></url>
<url><loc>https://scifaro.com/en/abs/soft-dynamic-time-warping-for-multi-pitch-estimation-and-beyond-2304.05032</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-dynamic-time-warping-for-multi-pitch-estimation-and-beyond-2304.05032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-dynamic-time-warping-for-multi-pitch-estimation-and-beyond-2304.05032"/></url>
<url><loc>https://scifaro.com/en/abs/looking-similar-sounding-different-leveraging-counterfactual-cross-modal-pairs-for-audiovisual-representation-learning-2304.05600</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/looking-similar-sounding-different-leveraging-counterfactual-cross-modal-pairs-for-audiovisual-representation-learning-2304.05600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/looking-similar-sounding-different-leveraging-counterfactual-cross-modal-pairs-for-audiovisual-representation-learning-2304.05600"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-with-cluster-aware-dino-for-high-performance-robust-speaker-verification-2304.05754</loc><lastmod>2023-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-with-cluster-aware-dino-for-high-performance-robust-speaker-verification-2304.05754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-with-cluster-aware-dino-for-high-performance-robust-speaker-verification-2304.05754"/></url>
<url><loc>https://scifaro.com/en/abs/a-phoneme-informed-neural-network-model-for-note-level-singing-transcription-2304.05917</loc><lastmod>2023-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-phoneme-informed-neural-network-model-for-note-level-singing-transcription-2304.05917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-phoneme-informed-neural-network-model-for-note-level-singing-transcription-2304.05917"/></url>
<url><loc>https://scifaro.com/en/abs/pd-adsv-an-automated-diagnosing-system-using-voice-signals-and-hard-voting-ensemble-method-for-parkinson-s-disease-2304.06016</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pd-adsv-an-automated-diagnosing-system-using-voice-signals-and-hard-voting-ensemble-method-for-parkinson-s-disease-2304.06016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pd-adsv-an-automated-diagnosing-system-using-voice-signals-and-hard-voting-ensemble-method-for-parkinson-s-disease-2304.06016"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-coherent-speaking-style-prediction-with-hierarchical-transformers-for-audiobook-speech-synthesis-2304.06359</loc><lastmod>2023-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-coherent-speaking-style-prediction-with-hierarchical-transformers-for-audiobook-speech-synthesis-2304.06359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-coherent-speaking-style-prediction-with-hierarchical-transformers-for-audiobook-speech-synthesis-2304.06359"/></url>
<url><loc>https://scifaro.com/en/abs/level-generation-for-rhythm-vr-games-2304.06809</loc><lastmod>2023-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/level-generation-for-rhythm-vr-games-2304.06809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/level-generation-for-rhythm-vr-games-2304.06809"/></url>
<url><loc>https://scifaro.com/en/abs/tempo-vs-pitch-understanding-self-supervised-tempo-estimation-2304.06868</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tempo-vs-pitch-understanding-self-supervised-tempo-estimation-2304.06868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tempo-vs-pitch-understanding-self-supervised-tempo-estimation-2304.06868"/></url>
<url><loc>https://scifaro.com/en/abs/on-data-sampling-strategies-for-training-neural-network-speech-separation-models-2304.07142</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-data-sampling-strategies-for-training-neural-network-speech-separation-models-2304.07142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-data-sampling-strategies-for-training-neural-network-speech-separation-models-2304.07142"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-meter-tracking-models-to-latin-american-music-2304.07186</loc><lastmod>2023-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-meter-tracking-models-to-latin-american-music-2304.07186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-meter-tracking-models-to-latin-american-music-2304.07186"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-auxiliary-loss-for-metric-learning-in-music-similarity-based-retrieval-and-auto-tagging-2304.07449</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-auxiliary-loss-for-metric-learning-in-music-similarity-based-retrieval-and-auto-tagging-2304.07449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-auxiliary-loss-for-metric-learning-in-music-similarity-based-retrieval-and-auto-tagging-2304.07449"/></url>
<url><loc>https://scifaro.com/en/abs/fast-random-approximation-of-multi-channel-room-impulse-response-2304.08052</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-random-approximation-of-multi-channel-room-impulse-response-2304.08052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-random-approximation-of-multi-channel-room-impulse-response-2304.08052"/></url>
<url><loc>https://scifaro.com/en/abs/physics-inspired-neuroacoustic-computing-based-on-tunable-nonlinear-multiple-scattering-2304.08380</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-inspired-neuroacoustic-computing-based-on-tunable-nonlinear-multiple-scattering-2304.08380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-inspired-neuroacoustic-computing-based-on-tunable-nonlinear-multiple-scattering-2304.08380"/></url>
<url><loc>https://scifaro.com/en/abs/a-voice-disease-detection-method-based-on-mfccs-and-shallow-cnn-2304.08708</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-voice-disease-detection-method-based-on-mfccs-and-shallow-cnn-2304.08708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-voice-disease-detection-method-based-on-mfccs-and-shallow-cnn-2304.08708"/></url>
<url><loc>https://scifaro.com/en/abs/from-words-to-music-a-study-of-subword-tokenization-techniques-in-symbolic-music-generation-2304.08953</loc><lastmod>2023-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-words-to-music-a-study-of-subword-tokenization-techniques-in-symbolic-music-generation-2304.08953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-words-to-music-a-study-of-subword-tokenization-techniques-in-symbolic-music-generation-2304.08953"/></url>
<url><loc>https://scifaro.com/en/abs/cb-conformer-contextual-biasing-conformer-for-biased-word-recognition-2304.09607</loc><lastmod>2023-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cb-conformer-contextual-biasing-conformer-for-biased-word-recognition-2304.09607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cb-conformer-contextual-biasing-conformer-for-biased-word-recognition-2304.09607"/></url>
<url><loc>https://scifaro.com/en/abs/clamp-contrastive-language-music-pre-training-for-cross-modal-symbolic-music-information-retrieval-2304.11029</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clamp-contrastive-language-music-pre-training-for-cross-modal-symbolic-music-information-retrieval-2304.11029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clamp-contrastive-language-music-pre-training-for-cross-modal-symbolic-music-information-retrieval-2304.11029"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-expression-detection-in-spoken-language-employing-machine-learning-algorithms-2304.11040</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-expression-detection-in-spoken-language-employing-machine-learning-algorithms-2304.11040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-expression-detection-in-spoken-language-employing-machine-learning-algorithms-2304.11040"/></url>
<url><loc>https://scifaro.com/en/abs/affective-social-anthropomorphic-intelligent-system-2304.11046</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/affective-social-anthropomorphic-intelligent-system-2304.11046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/affective-social-anthropomorphic-intelligent-system-2304.11046"/></url>
<url><loc>https://scifaro.com/en/abs/using-mobile-data-and-deep-models-to-assess-auditory-verbal-hallucinations-2304.11049</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-mobile-data-and-deep-models-to-assess-auditory-verbal-hallucinations-2304.11049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-mobile-data-and-deep-models-to-assess-auditory-verbal-hallucinations-2304.11049"/></url>
<url><loc>https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-speech-emotion-recognition-2304.11117</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-speech-emotion-recognition-2304.11117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-speech-emotion-recognition-2304.11117"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-toxicity-detection-in-spoken-language-a-transformer-based-approach-for-edge-devices-2304.11408</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-toxicity-detection-in-spoken-language-a-transformer-based-approach-for-edge-devices-2304.11408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-toxicity-detection-in-spoken-language-a-transformer-based-approach-for-edge-devices-2304.11408"/></url>
<url><loc>https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-homophony-music-performance-2304.11521</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-homophony-music-performance-2304.11521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-order-complexity-model-for-aesthetic-quality-assessment-of-homophony-music-performance-2304.11521"/></url>
<url><loc>https://scifaro.com/en/abs/sar-self-supervised-anti-distortion-representation-for-end-to-end-speech-model-2304.11547</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sar-self-supervised-anti-distortion-representation-for-end-to-end-speech-model-2304.11547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sar-self-supervised-anti-distortion-representation-for-end-to-end-speech-model-2304.11547"/></url>
<url><loc>https://scifaro.com/en/abs/sound-based-drone-fault-classification-using-multitask-learning-2304.11708</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-based-drone-fault-classification-using-multitask-learning-2304.11708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-based-drone-fault-classification-using-multitask-learning-2304.11708"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-text-to-speech-synthesis-conditioned-using-self-supervised-speech-representation-model-2304.11976</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-synthesis-conditioned-using-self-supervised-speech-representation-model-2304.11976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-synthesis-conditioned-using-self-supervised-speech-representation-model-2304.11976"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-visual-singing-voice-transcription-based-on-self-supervised-learning-models-2304.12082</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-visual-singing-voice-transcription-based-on-self-supervised-learning-models-2304.12082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-visual-singing-voice-transcription-based-on-self-supervised-learning-models-2304.12082"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-slimmable-networks-for-keyword-spotting-2304.12183</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-slimmable-networks-for-keyword-spotting-2304.12183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-slimmable-networks-for-keyword-spotting-2304.12183"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-strategies-using-contrastive-learning-and-playlist-information-for-music-classification-and-similarity-2304.12257</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-strategies-using-contrastive-learning-and-playlist-information-for-music-classification-and-similarity-2304.12257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-strategies-using-contrastive-learning-and-playlist-information-for-music-classification-and-similarity-2304.12257"/></url>
<url><loc>https://scifaro.com/en/abs/foley-sound-synthesis-at-the-dcase-2023-challenge-2304.12521</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foley-sound-synthesis-at-the-dcase-2023-challenge-2304.12521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foley-sound-synthesis-at-the-dcase-2023-challenge-2304.12521"/></url>
<url><loc>https://scifaro.com/en/abs/gtn-bailando-genre-consistent-long-term-3d-dance-generation-based-on-pre-trained-genre-token-network-2304.12704</loc><lastmod>2023-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gtn-bailando-genre-consistent-long-term-3d-dance-generation-based-on-pre-trained-genre-token-network-2304.12704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gtn-bailando-genre-consistent-long-term-3d-dance-generation-based-on-pre-trained-genre-token-network-2304.12704"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2304.12739</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2304.12739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-representations-of-sound-for-automatic-insect-recognition-2304.12739"/></url>
<url><loc>https://scifaro.com/en/abs/the-accompanion-combining-reactivity-robustness-and-musical-expressivity-in-an-automatic-piano-accompanist-2304.12939</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-accompanion-combining-reactivity-robustness-and-musical-expressivity-in-an-automatic-piano-accompanist-2304.12939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-accompanion-combining-reactivity-robustness-and-musical-expressivity-in-an-automatic-piano-accompanist-2304.12939"/></url>
<url><loc>https://scifaro.com/en/abs/room-dimensions-and-absorption-inference-from-room-transfer-function-via-machine-learning-2304.12993</loc><lastmod>2023-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-dimensions-and-absorption-inference-from-room-transfer-function-via-machine-learning-2304.12993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-dimensions-and-absorption-inference-from-room-transfer-function-via-machine-learning-2304.12993"/></url>
<url><loc>https://scifaro.com/en/abs/ai-synthesized-voice-detection-using-neural-vocoder-artifacts-2304.13085</loc><lastmod>2023-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-synthesized-voice-detection-using-neural-vocoder-artifacts-2304.13085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-synthesized-voice-detection-using-neural-vocoder-artifacts-2304.13085"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-multi-lingual-vqtts-system-for-limmits-2023-challenge-2304.13121</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-multi-lingual-vqtts-system-for-limmits-2023-challenge-2304.13121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-multi-lingual-vqtts-system-for-limmits-2023-challenge-2304.13121"/></url>
<url><loc>https://scifaro.com/en/abs/xai-based-comparison-of-input-representations-for-audio-event-classification-2304.14019</loc><lastmod>2023-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xai-based-comparison-of-input-representations-for-audio-event-classification-2304.14019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xai-based-comparison-of-input-representations-for-audio-event-classification-2304.14019"/></url>
<url><loc>https://scifaro.com/en/abs/deep-transfer-learning-for-automatic-speech-recognition-towards-better-generalization-2304.14535</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-transfer-learning-for-automatic-speech-recognition-towards-better-generalization-2304.14535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-transfer-learning-for-automatic-speech-recognition-towards-better-generalization-2304.14535"/></url>
<url><loc>https://scifaro.com/en/abs/musical-voice-separation-as-link-prediction-modeling-a-musical-perception-task-as-a-multi-trajectory-tracking-problem-2304.14848</loc><lastmod>2023-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-voice-separation-as-link-prediction-modeling-a-musical-perception-task-as-a-multi-trajectory-tracking-problem-2304.14848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-voice-separation-as-link-prediction-modeling-a-musical-perception-task-as-a-multi-trajectory-tracking-problem-2304.14848"/></url>
<url><loc>https://scifaro.com/en/abs/the-acm-multimedia-2023-computational-paralinguistics-challenge-emotion-share-requests-2304.14882</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-acm-multimedia-2023-computational-paralinguistics-challenge-emotion-share-requests-2304.14882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-acm-multimedia-2023-computational-paralinguistics-challenge-emotion-share-requests-2304.14882"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-representation-learning-for-robust-privacy-preservation-in-audio-2305.00011</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-representation-learning-for-robust-privacy-preservation-in-audio-2305.00011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-representation-learning-for-robust-privacy-preservation-in-audio-2305.00011"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-in-air-traffic-control-by-sentence-level-language-identification-2305.00170</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-in-air-traffic-control-by-sentence-level-language-identification-2305.00170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-in-air-traffic-control-by-sentence-level-language-identification-2305.00170"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-synthesis-from-vocal-imitations-and-sound-event-labels-2305.00302</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-synthesis-from-vocal-imitations-and-sound-event-labels-2305.00302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-synthesis-from-vocal-imitations-and-sound-event-labels-2305.00302"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-sequence-labeling-for-audio-classification-based-on-mfccs-2305.00417</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-sequence-labeling-for-audio-classification-based-on-mfccs-2305.00417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-sequence-labeling-for-audio-classification-based-on-mfccs-2305.00417"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-of-knowledge-among-instruments-in-automatic-music-transcription-2305.00426</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-of-knowledge-among-instruments-in-automatic-music-transcription-2305.00426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-of-knowledge-among-instruments-in-automatic-music-transcription-2305.00426"/></url>
<url><loc>https://scifaro.com/en/abs/emotions-beyond-words-non-speech-audio-emotion-recognition-with-edge-computing-2305.00725</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotions-beyond-words-non-speech-audio-emotion-recognition-with-edge-computing-2305.00725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotions-beyond-words-non-speech-audio-emotion-recognition-with-edge-computing-2305.00725"/></url>
<url><loc>https://scifaro.com/en/abs/cryceleb-a-speaker-verification-dataset-based-on-infant-cry-sounds-2305.00969</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cryceleb-a-speaker-verification-dataset-based-on-infant-cry-sounds-2305.00969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cryceleb-a-speaker-verification-dataset-based-on-infant-cry-sounds-2305.00969"/></url>
<url><loc>https://scifaro.com/en/abs/loopy-a-research-friendly-mix-framework-for-music-information-retrieval-on-electronic-dance-music-2305.01051</loc><lastmod>2023-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loopy-a-research-friendly-mix-framework-for-music-information-retrieval-on-electronic-dance-music-2305.01051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loopy-a-research-friendly-mix-framework-for-music-information-retrieval-on-electronic-dance-music-2305.01051"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-speech-mixup-for-low-resource-keyword-spotting-2305.01170</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-speech-mixup-for-low-resource-keyword-spotting-2305.01170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-speech-mixup-for-low-resource-keyword-spotting-2305.01170"/></url>
<url><loc>https://scifaro.com/en/abs/long-term-rhythmic-video-soundtracker-2305.01319</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-term-rhythmic-video-soundtracker-2305.01319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-term-rhythmic-video-soundtracker-2305.01319"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-multimodal-with-two-phase-training-strategy-for-daily-life-video-classification-2305.01476</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-multimodal-with-two-phase-training-strategy-for-daily-life-video-classification-2305.01476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-multimodal-with-two-phase-training-strategy-for-daily-life-video-classification-2305.01476"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-in-audio-captioning-a-sentence-embedding-regression-loss-acts-as-a-regularizer-2305.01482</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-in-audio-captioning-a-sentence-embedding-regression-loss-acts-as-a-regularizer-2305.01482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-in-audio-captioning-a-sentence-embedding-regression-loss-acts-as-a-regularizer-2305.01482"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-infant-cry-analysis-2305.01578</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-infant-cry-analysis-2305.01578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-infant-cry-analysis-2305.01578"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-spoken-instructions-into-flight-trajectory-prediction-to-optimize-automation-in-air-traffic-control-2305.01661</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-spoken-instructions-into-flight-trajectory-prediction-to-optimize-automation-in-air-traffic-control-2305.01661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-spoken-instructions-into-flight-trajectory-prediction-to-optimize-automation-in-air-traffic-control-2305.01661"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-improvement-of-audio-text-cross-modal-representations-2305.01864</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-improvement-of-audio-text-cross-modal-representations-2305.01864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-improvement-of-audio-text-cross-modal-representations-2305.01864"/></url>
<url><loc>https://scifaro.com/en/abs/diverse-and-vivid-sound-generation-from-text-descriptions-2305.01980</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diverse-and-vivid-sound-generation-from-text-descriptions-2305.01980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diverse-and-vivid-sound-generation-from-text-descriptions-2305.01980"/></url>
<url><loc>https://scifaro.com/en/abs/m2-ctts-end-to-end-multi-scale-multi-modal-conversational-text-to-speech-synthesis-2305.02269</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2-ctts-end-to-end-multi-scale-multi-modal-conversational-text-to-speech-synthesis-2305.02269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2-ctts-end-to-end-multi-scale-multi-modal-conversational-text-to-speech-synthesis-2305.02269"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-detect-novel-and-fine-grained-acoustic-sequences-using-pretrained-audio-representations-2305.02382</loc><lastmod>2023-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-detect-novel-and-fine-grained-acoustic-sequences-using-pretrained-audio-representations-2305.02382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-detect-novel-and-fine-grained-acoustic-sequences-using-pretrained-audio-representations-2305.02382"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-codec-group-residual-vector-quantization-for-high-fidelity-audio-codec-2305.02765</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-codec-group-residual-vector-quantization-for-high-fidelity-audio-codec-2305.02765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-codec-group-residual-vector-quantization-for-high-fidelity-audio-codec-2305.02765"/></url>
<url><loc>https://scifaro.com/en/abs/compressing-audio-cnns-with-graph-centrality-based-filter-pruning-2305.03391</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compressing-audio-cnns-with-graph-centrality-based-filter-pruning-2305.03391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compressing-audio-cnns-with-graph-centrality-based-filter-pruning-2305.03391"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-softly-masked-language-modelling-for-controllable-symbolic-music-generation-2305.03530</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-softly-masked-language-modelling-for-controllable-symbolic-music-generation-2305.03530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-softly-masked-language-modelling-for-controllable-symbolic-music-generation-2305.03530"/></url>
<url><loc>https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-audiovisual-speech-emotion-recognition-2305.03568</loc><lastmod>2025-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-audiovisual-speech-emotion-recognition-2305.03568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-vector-quantized-masked-autoencoder-for-audiovisual-speech-emotion-recognition-2305.03568"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-dynamical-variational-autoencoder-for-audiovisual-speech-representation-learning-2305.03582</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-dynamical-variational-autoencoder-for-audiovisual-speech-representation-learning-2305.03582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-dynamical-variational-autoencoder-for-audiovisual-speech-representation-learning-2305.03582"/></url>
<url><loc>https://scifaro.com/en/abs/physics-based-acoustic-holograms-2305.03625</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-based-acoustic-holograms-2305.03625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-based-acoustic-holograms-2305.03625"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-estimation-by-denoising-preprocessor-and-hybrid-estimation-model-2305.03982</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-estimation-by-denoising-preprocessor-and-hybrid-estimation-model-2305.03982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-estimation-by-denoising-preprocessor-and-hybrid-estimation-model-2305.03982"/></url>
<url><loc>https://scifaro.com/en/abs/a-method-for-analyzing-sampling-jitter-in-audio-equipment-2305.04531</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-method-for-analyzing-sampling-jitter-in-audio-equipment-2305.04531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-method-for-analyzing-sampling-jitter-in-audio-equipment-2305.04531"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-cough-audio-with-gan-for-covid-19-detection-2305.04810</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-cough-audio-with-gan-for-covid-19-detection-2305.04810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-cough-audio-with-gan-for-covid-19-detection-2305.04810"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-convolution-network-based-onset-detection-and-query-by-humming-system-design-2305.05139</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-convolution-network-based-onset-detection-and-query-by-humming-system-design-2305.05139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-convolution-network-based-onset-detection-and-query-by-humming-system-design-2305.05139"/></url>
<url><loc>https://scifaro.com/en/abs/who-is-speaking-actually-robust-and-versatile-speaker-traceability-for-voice-conversion-2305.05152</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-is-speaking-actually-robust-and-versatile-speaker-traceability-for-voice-conversion-2305.05152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-is-speaking-actually-robust-and-versatile-speaker-traceability-for-voice-conversion-2305.05152"/></url>
<url><loc>https://scifaro.com/en/abs/joint-multi-scale-cross-lingual-speaking-style-transfer-with-bidirectional-attention-mechanism-for-automatic-dubbing-2305.05203</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-multi-scale-cross-lingual-speaking-style-transfer-with-bidirectional-attention-mechanism-for-automatic-dubbing-2305.05203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-multi-scale-cross-lingual-speaking-style-transfer-with-bidirectional-attention-mechanism-for-automatic-dubbing-2305.05203"/></url>
<url><loc>https://scifaro.com/en/abs/learn-to-sing-by-listening-building-controllable-virtual-singer-by-unsupervised-learning-from-voice-recordings-2305.05401</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn-to-sing-by-listening-building-controllable-virtual-singer-by-unsupervised-learning-from-voice-recordings-2305.05401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn-to-sing-by-listening-building-controllable-virtual-singer-by-unsupervised-learning-from-voice-recordings-2305.05401"/></url>
<url><loc>https://scifaro.com/en/abs/audioslots-a-slot-centric-generative-model-for-audio-separation-2305.05591</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioslots-a-slot-centric-generative-model-for-audio-separation-2305.05591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioslots-a-slot-centric-generative-model-for-audio-separation-2305.05591"/></url>
<url><loc>https://scifaro.com/en/abs/inter-subnet-speech-enhancement-with-subband-interaction-2305.05599</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inter-subnet-speech-enhancement-with-subband-interaction-2305.05599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inter-subnet-speech-enhancement-with-subband-interaction-2305.05599"/></url>
<url><loc>https://scifaro.com/en/abs/vsmask-defending-against-voice-synthesis-attack-via-real-time-predictive-perturbation-2305.05736</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vsmask-defending-against-voice-synthesis-attack-via-real-time-predictive-perturbation-2305.05736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vsmask-defending-against-voice-synthesis-attack-via-real-time-predictive-perturbation-2305.05736"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-gappy-speech-audio-signals-with-generative-adversarial-networks-2305.05780</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-gappy-speech-audio-signals-with-generative-adversarial-networks-2305.05780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-gappy-speech-audio-signals-with-generative-adversarial-networks-2305.05780"/></url>
<url><loc>https://scifaro.com/en/abs/mispronunciation-detection-of-basic-quranic-recitation-rules-using-deep-learning-2305.06429</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mispronunciation-detection-of-basic-quranic-recitation-rules-using-deep-learning-2305.06429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mispronunciation-detection-of-basic-quranic-recitation-rules-using-deep-learning-2305.06429"/></url>
<url><loc>https://scifaro.com/en/abs/v2meow-meowing-to-the-visual-beat-via-video-to-music-generation-2305.06594</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/v2meow-meowing-to-the-visual-beat-via-video-to-music-generation-2305.06594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/v2meow-meowing-to-the-visual-beat-via-video-to-music-generation-2305.06594"/></url>
<url><loc>https://scifaro.com/en/abs/extending-audio-masked-autoencoders-toward-audio-restoration-2305.06701</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-audio-masked-autoencoders-toward-audio-restoration-2305.06701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-audio-masked-autoencoders-toward-audio-restoration-2305.06701"/></url>
<url><loc>https://scifaro.com/en/abs/happyquokka-system-for-icassp-2023-auditory-eeg-challenge-2305.06806</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/happyquokka-system-for-icassp-2023-auditory-eeg-challenge-2305.06806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/happyquokka-system-for-icassp-2023-auditory-eeg-challenge-2305.06806"/></url>
<url><loc>https://scifaro.com/en/abs/comospeech-one-step-speech-and-singing-voice-synthesis-via-consistency-model-2305.06908</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comospeech-one-step-speech-and-singing-voice-synthesis-via-consistency-model-2305.06908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comospeech-one-step-speech-and-singing-voice-synthesis-via-consistency-model-2305.06908"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-interpretability-in-audio-classification-networks-with-non-negative-matrix-factorization-2305.07132</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-interpretability-in-audio-classification-networks-with-non-negative-matrix-factorization-2305.07132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-interpretability-in-audio-classification-networks-with-non-negative-matrix-factorization-2305.07132"/></url>
<url><loc>https://scifaro.com/en/abs/transavs-end-to-end-audio-visual-segmentation-with-transformer-2305.07223</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transavs-end-to-end-audio-visual-segmentation-with-transformer-2305.07223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transavs-end-to-end-audio-visual-segmentation-with-transformer-2305.07223"/></url>
<url><loc>https://scifaro.com/en/abs/better-speech-synthesis-through-scaling-2305.07243</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/better-speech-synthesis-through-scaling-2305.07243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/better-speech-synthesis-through-scaling-2305.07243"/></url>
<url><loc>https://scifaro.com/en/abs/music-rearrangement-using-hierarchical-segmentation-2305.07347</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-rearrangement-using-hierarchical-segmentation-2305.07347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-rearrangement-using-hierarchical-segmentation-2305.07347"/></url>
<url><loc>https://scifaro.com/en/abs/universal-source-separation-with-weakly-labelled-data-2305.07447</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-source-separation-with-weakly-labelled-data-2305.07447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-source-separation-with-weakly-labelled-data-2305.07447"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarks-and-leaderboards-for-sound-demixing-tasks-2305.07489</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarks-and-leaderboards-for-sound-demixing-tasks-2305.07489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarks-and-leaderboards-for-sound-demixing-tasks-2305.07489"/></url>
<url><loc>https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-via-impulse-response-augmentation-2305.07499</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-via-impulse-response-augmentation-2305.07499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-via-impulse-response-augmentation-2305.07499"/></url>
<url><loc>https://scifaro.com/en/abs/masked-audio-text-encoders-are-effective-multi-modal-rescorers-2305.07677</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-audio-text-encoders-are-effective-multi-modal-rescorers-2305.07677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-audio-text-encoders-are-effective-multi-modal-rescorers-2305.07677"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-discussion-on-dcase-2023-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2305.07828</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2023-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2305.07828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2023-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2305.07828"/></url>
<url><loc>https://scifaro.com/en/abs/higher-order-frequency-modulation-synthesis-2305.07909</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/higher-order-frequency-modulation-synthesis-2305.07909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/higher-order-frequency-modulation-synthesis-2305.07909"/></url>
<url><loc>https://scifaro.com/en/abs/apnet-an-all-frame-level-neural-vocoder-incorporating-direct-prediction-of-amplitude-and-phase-spectra-2305.07952</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apnet-an-all-frame-level-neural-vocoder-incorporating-direct-prediction-of-amplitude-and-phase-spectra-2305.07952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apnet-an-all-frame-level-neural-vocoder-incorporating-direct-prediction-of-amplitude-and-phase-spectra-2305.07952"/></url>
<url><loc>https://scifaro.com/en/abs/sound-to-vibration-transformation-for-sensorless-motor-health-monitoring-2305.07960</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-to-vibration-transformation-for-sensorless-motor-health-monitoring-2305.07960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-to-vibration-transformation-for-sensorless-motor-health-monitoring-2305.07960"/></url>
<url><loc>https://scifaro.com/en/abs/remast-real-time-emotion-based-music-arrangement-with-soft-transition-2305.08029</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remast-real-time-emotion-based-music-arrangement-with-soft-transition-2305.08029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remast-real-time-emotion-based-music-arrangement-with-soft-transition-2305.08029"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-neural-factor-analysis-for-disentangling-utterance-level-speech-representations-2305.08099</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-neural-factor-analysis-for-disentangling-utterance-level-speech-representations-2305.08099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-neural-factor-analysis-for-disentangling-utterance-level-speech-representations-2305.08099"/></url>
<url><loc>https://scifaro.com/en/abs/forknet-simultaneous-time-and-time-frequency-domain-modeling-for-speech-enhancement-2305.08292</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/forknet-simultaneous-time-and-time-frequency-domain-modeling-for-speech-enhancement-2305.08292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/forknet-simultaneous-time-and-time-frequency-domain-modeling-for-speech-enhancement-2305.08292"/></url>
<url><loc>https://scifaro.com/en/abs/ripple-sparse-self-attention-for-monaural-speech-enhancement-2305.08541</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ripple-sparse-self-attention-for-monaural-speech-enhancement-2305.08541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ripple-sparse-self-attention-for-monaural-speech-enhancement-2305.08541"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-speaker-disentanglement-using-unannotated-external-data-for-self-supervised-representation-based-voice-conversion-2305.09167</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-speaker-disentanglement-using-unannotated-external-data-for-self-supervised-representation-based-voice-conversion-2305.09167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-speaker-disentanglement-using-unannotated-external-data-for-self-supervised-representation-based-voice-conversion-2305.09167"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-using-teacher-student-scheme-and-multiple-spectrograms-2305.09463</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-using-teacher-student-scheme-and-multiple-spectrograms-2305.09463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-deep-learning-frameworks-for-acoustic-scene-classification-using-teacher-student-scheme-and-multiple-spectrograms-2305.09463"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-diffusion-probabilistic-models-for-symbolic-music-generation-2305.09489</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-diffusion-probabilistic-models-for-symbolic-music-generation-2305.09489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-diffusion-probabilistic-models-for-symbolic-music-generation-2305.09489"/></url>
<url><loc>https://scifaro.com/en/abs/robust-and-lightweight-audio-fingerprint-for-automatic-content-recognition-2305.09559</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-and-lightweight-audio-fingerprint-for-automatic-content-recognition-2305.09559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-and-lightweight-audio-fingerprint-for-automatic-content-recognition-2305.09559"/></url>
<url><loc>https://scifaro.com/en/abs/soundstorm-efficient-parallel-audio-generation-2305.09636</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundstorm-efficient-parallel-audio-generation-2305.09636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundstorm-efficient-parallel-audio-generation-2305.09636"/></url>
<url><loc>https://scifaro.com/en/abs/a-whisper-transformer-for-audio-captioning-trained-with-synthetic-captions-and-transfer-learning-2305.09690</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-whisper-transformer-for-audio-captioning-trained-with-synthetic-captions-and-transfer-learning-2305.09690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-whisper-transformer-for-audio-captioning-trained-with-synthetic-captions-and-transfer-learning-2305.09690"/></url>
<url><loc>https://scifaro.com/en/abs/ml-superb-multilingual-speech-universal-performance-benchmark-2305.10615</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ml-superb-multilingual-speech-universal-performance-benchmark-2305.10615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ml-superb-multilingual-speech-universal-performance-benchmark-2305.10615"/></url>
<url><loc>https://scifaro.com/en/abs/zeroprompt-streaming-acoustic-encoders-are-zero-shot-masked-lms-2305.10649</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zeroprompt-streaming-acoustic-encoders-are-zero-shot-masked-lms-2305.10649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zeroprompt-streaming-acoustic-encoders-are-zero-shot-masked-lms-2305.10649"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-based-on-contrastive-learning-and-deep-modularization-2305.10652</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-based-on-contrastive-learning-and-deep-modularization-2305.10652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-based-on-contrastive-learning-and-deep-modularization-2305.10652"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-and-reliable-confidence-estimation-based-on-non-autoregressive-end-to-end-speech-recognition-system-2305.10680</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-and-reliable-confidence-estimation-based-on-non-autoregressive-end-to-end-speech-recognition-system-2305.10680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-and-reliable-confidence-estimation-based-on-non-autoregressive-end-to-end-speech-recognition-system-2305.10680"/></url>
<url><loc>https://scifaro.com/en/abs/rmssinger-realistic-music-score-based-singing-voice-synthesis-2305.10686</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rmssinger-realistic-music-score-based-singing-voice-synthesis-2305.10686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rmssinger-realistic-music-score-based-singing-voice-synthesis-2305.10686"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-encoder-decoder-network-for-end-to-end-neural-speaker-diarization-with-target-speaker-attractor-2305.10704</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-encoder-decoder-network-for-end-to-end-neural-speaker-diarization-with-target-speaker-attractor-2305.10704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-encoder-decoder-network-for-end-to-end-neural-speaker-diarization-with-target-speaker-attractor-2305.10704"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-joint-generative-and-predictive-decoders-2305.10734</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-joint-generative-and-predictive-decoders-2305.10734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-joint-generative-and-predictive-decoders-2305.10734"/></url>
<url><loc>https://scifaro.com/en/abs/noise-aware-speech-separation-with-contrastive-learning-2305.10761</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-aware-speech-separation-with-contrastive-learning-2305.10761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-aware-speech-separation-with-contrastive-learning-2305.10761"/></url>
<url><loc>https://scifaro.com/en/abs/clapspeech-learning-prosody-from-text-context-with-contrastive-language-audio-pre-training-2305.10763</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clapspeech-learning-prosody-from-text-context-with-contrastive-language-audio-pre-training-2305.10763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clapspeech-learning-prosody-from-text-context-with-contrastive-language-audio-pre-training-2305.10763"/></url>
<url><loc>https://scifaro.com/en/abs/dq-whisper-joint-distillation-and-quantization-for-efficient-multilingual-speech-recognition-2305.10788</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dq-whisper-joint-distillation-and-quantization-for-efficient-multilingual-speech-recognition-2305.10788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dq-whisper-joint-distillation-and-quantization-for-efficient-multilingual-speech-recognition-2305.10788"/></url>
<url><loc>https://scifaro.com/en/abs/validation-of-an-ecapa-tdnn-system-for-forensic-automatic-speaker-recognition-under-case-work-conditions-2305.10805</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/validation-of-an-ecapa-tdnn-system-for-forensic-automatic-speaker-recognition-under-case-work-conditions-2305.10805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/validation-of-an-ecapa-tdnn-system-for-forensic-automatic-speaker-recognition-under-case-work-conditions-2305.10805"/></url>
<url><loc>https://scifaro.com/en/abs/getmusic-generating-any-music-tracks-with-a-unified-representation-and-diffusion-framework-2305.10841</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/getmusic-generating-any-music-tracks-with-a-unified-representation-and-diffusion-framework-2305.10841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/getmusic-generating-any-music-tracks-with-a-unified-representation-and-diffusion-framework-2305.10841"/></url>
<url><loc>https://scifaro.com/en/abs/funasr-a-fundamental-end-to-end-speech-recognition-toolkit-2305.11013</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/funasr-a-fundamental-end-to-end-speech-recognition-toolkit-2305.11013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/funasr-a-fundamental-end-to-end-speech-recognition-toolkit-2305.11013"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-multi-channel-separation-and-adaptation-2305.11151</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-multi-channel-separation-and-adaptation-2305.11151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-multi-channel-separation-and-adaptation-2305.11151"/></url>
<url><loc>https://scifaro.com/en/abs/trustser-on-the-trustworthiness-of-fine-tuning-pre-trained-speech-embeddings-for-speech-emotion-recognition-2305.11229</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trustser-on-the-trustworthiness-of-fine-tuning-pre-trained-speech-embeddings-for-speech-emotion-recognition-2305.11229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trustser-on-the-trustworthiness-of-fine-tuning-pre-trained-speech-embeddings-for-speech-emotion-recognition-2305.11229"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-learning-for-text-to-speech-accent-adaptation-2305.11320</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-learning-for-text-to-speech-accent-adaptation-2305.11320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-learning-for-text-to-speech-accent-adaptation-2305.11320"/></url>
<url><loc>https://scifaro.com/en/abs/differentially-private-adapters-for-parameter-efficient-acoustic-modeling-2305.11360</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentially-private-adapters-for-parameter-efficient-acoustic-modeling-2305.11360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentially-private-adapters-for-parameter-efficient-acoustic-modeling-2305.11360"/></url>
<url><loc>https://scifaro.com/en/abs/a-preliminary-study-on-augmenting-speech-emotion-recognition-using-a-diffusion-model-2305.11413</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-preliminary-study-on-augmenting-speech-emotion-recognition-using-a-diffusion-model-2305.11413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-preliminary-study-on-augmenting-speech-emotion-recognition-using-a-diffusion-model-2305.11413"/></url>
<url><loc>https://scifaro.com/en/abs/what-you-hear-is-what-you-see-audio-quality-metrics-from-image-quality-metrics-2305.11582</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-you-hear-is-what-you-see-audio-quality-metrics-from-image-quality-metrics-2305.11582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-you-hear-is-what-you-see-audio-quality-metrics-from-image-quality-metrics-2305.11582"/></url>
<url><loc>https://scifaro.com/en/abs/midi-draw-sketching-to-control-melody-generation-2305.11605</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-draw-sketching-to-control-melody-generation-2305.11605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-draw-sketching-to-control-melody-generation-2305.11605"/></url>
<url><loc>https://scifaro.com/en/abs/sensing-of-inspiration-events-from-speech-comparison-of-deep-learning-and-linguistic-methods-2305.11683</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sensing-of-inspiration-events-from-speech-comparison-of-deep-learning-and-linguistic-methods-2305.11683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sensing-of-inspiration-events-from-speech-comparison-of-deep-learning-and-linguistic-methods-2305.11683"/></url>
<url><loc>https://scifaro.com/en/abs/direction-specific-ambisonics-source-separation-with-end-to-end-deep-learning-2305.11727</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-specific-ambisonics-source-separation-with-end-to-end-deep-learning-2305.11727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-specific-ambisonics-source-separation-with-end-to-end-deep-learning-2305.11727"/></url>
<url><loc>https://scifaro.com/en/abs/mparrottts-multilingual-multi-speaker-text-to-speech-synthesis-in-low-resource-setting-2305.11926</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mparrottts-multilingual-multi-speaker-text-to-speech-synthesis-in-low-resource-setting-2305.11926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mparrottts-multilingual-multi-speaker-text-to-speech-synthesis-in-low-resource-setting-2305.11926"/></url>
<url><loc>https://scifaro.com/en/abs/ee-tts-emphatic-expressive-tts-with-linguistic-information-2305.12107</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ee-tts-emphatic-expressive-tts-with-linguistic-information-2305.12107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ee-tts-emphatic-expressive-tts-with-linguistic-information-2305.12107"/></url>
<url><loc>https://scifaro.com/en/abs/aca-net-towards-lightweight-speaker-verification-using-asymmetric-cross-attention-2305.12121</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aca-net-towards-lightweight-speaker-verification-using-asymmetric-cross-attention-2305.12121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aca-net-towards-lightweight-speaker-verification-using-asymmetric-cross-attention-2305.12121"/></url>
<url><loc>https://scifaro.com/en/abs/comedicspeech-text-to-speech-for-stand-up-comedies-in-low-resource-scenarios-2305.12200</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comedicspeech-text-to-speech-for-stand-up-comedies-in-low-resource-scenarios-2305.12200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comedicspeech-text-to-speech-for-stand-up-comedies-in-low-resource-scenarios-2305.12200"/></url>
<url><loc>https://scifaro.com/en/abs/laughter-synthesis-using-pseudo-phonetic-tokens-with-a-large-scale-in-the-wild-laughter-corpus-2305.12442</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/laughter-synthesis-using-pseudo-phonetic-tokens-with-a-large-scale-in-the-wild-laughter-corpus-2305.12442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/laughter-synthesis-using-pseudo-phonetic-tokens-with-a-large-scale-in-the-wild-laughter-corpus-2305.12442"/></url>
<url><loc>https://scifaro.com/en/abs/jnv-corpus-a-corpus-of-japanese-nonverbal-vocalizations-with-diverse-phrases-and-emotions-2305.12445</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jnv-corpus-a-corpus-of-japanese-nonverbal-vocalizations-with-diverse-phrases-and-emotions-2305.12445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jnv-corpus-a-corpus-of-japanese-nonverbal-vocalizations-with-diverse-phrases-and-emotions-2305.12445"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-gans-for-noisy-speech-simulation-from-clean-speech-2305.12460</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-gans-for-noisy-speech-simulation-from-clean-speech-2305.12460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-gans-for-noisy-speech-simulation-from-clean-speech-2305.12460"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-paralinguistic-assessment-for-real-world-mobile-health-mhealth-monitoring-an-initial-study-of-reverberation-effects-on-speech-2305.12514</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-paralinguistic-assessment-for-real-world-mobile-health-mhealth-monitoring-an-initial-study-of-reverberation-effects-on-speech-2305.12514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-paralinguistic-assessment-for-real-world-mobile-health-mhealth-monitoring-an-initial-study-of-reverberation-effects-on-speech-2305.12514"/></url>
<url><loc>https://scifaro.com/en/abs/the-hccl-system-for-voxceleb-speaker-recognition-challenge-2022-2305.12642</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hccl-system-for-voxceleb-speaker-recognition-challenge-2022-2305.12642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hccl-system-for-voxceleb-speaker-recognition-challenge-2022-2305.12642"/></url>
<url><loc>https://scifaro.com/en/abs/more-perspectives-mean-better-underwater-target-recognition-and-localization-with-multimodal-data-via-symbiotic-transformer-and-multiview-regression-2305.12701</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/more-perspectives-mean-better-underwater-target-recognition-and-localization-with-multimodal-data-via-symbiotic-transformer-and-multiview-regression-2305.12701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/more-perspectives-mean-better-underwater-target-recognition-and-localization-with-multimodal-data-via-symbiotic-transformer-and-multiview-regression-2305.12701"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-sub-graph-clustering-algorithm-for-semi-supervised-domain-adaptation-speaker-verification-2305.12703</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-sub-graph-clustering-algorithm-for-semi-supervised-domain-adaptation-speaker-verification-2305.12703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-sub-graph-clustering-algorithm-for-semi-supervised-domain-adaptation-speaker-verification-2305.12703"/></url>
<url><loc>https://scifaro.com/en/abs/lean-light-and-efficient-audio-classification-network-2305.12712</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lean-light-and-efficient-audio-classification-network-2305.12712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lean-light-and-efficient-audio-classification-network-2305.12712"/></url>
<url><loc>https://scifaro.com/en/abs/gncformer-enhanced-self-attention-for-automatic-speech-recognition-2305.12755</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gncformer-enhanced-self-attention-for-automatic-speech-recognition-2305.12755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gncformer-enhanced-self-attention-for-automatic-speech-recognition-2305.12755"/></url>
<url><loc>https://scifaro.com/en/abs/the-defender-s-perspective-on-automatic-speaker-verification-an-overview-2305.12804</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-defender-s-perspective-on-automatic-speaker-verification-an-overview-2305.12804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-defender-s-perspective-on-automatic-speaker-verification-an-overview-2305.12804"/></url>
<url><loc>https://scifaro.com/en/abs/nas-fm-neural-architecture-search-for-tunable-and-interpretable-sound-synthesis-based-on-frequency-modulation-2305.12868</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nas-fm-neural-architecture-search-for-tunable-and-interpretable-sound-synthesis-based-on-frequency-modulation-2305.12868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nas-fm-neural-architecture-search-for-tunable-and-interpretable-sound-synthesis-based-on-frequency-modulation-2305.12868"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generalizing-deep-audio-fake-detection-networks-2305.13033</loc><lastmod>2024-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generalizing-deep-audio-fake-detection-networks-2305.13033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generalizing-deep-audio-fake-detection-networks-2305.13033"/></url>
<url><loc>https://scifaro.com/en/abs/audiotoken-adaptation-of-text-conditioned-diffusion-models-for-audio-to-image-generation-2305.13050</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiotoken-adaptation-of-text-conditioned-diffusion-models-for-audio-to-image-generation-2305.13050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiotoken-adaptation-of-text-conditioned-diffusion-models-for-audio-to-image-generation-2305.13050"/></url>
<url><loc>https://scifaro.com/en/abs/u-dit-tts-u-diffusion-vision-transformer-for-text-to-speech-2305.13195</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-dit-tts-u-diffusion-vision-transformer-for-text-to-speech-2305.13195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-dit-tts-u-diffusion-vision-transformer-for-text-to-speech-2305.13195"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-detect-an-animal-sound-from-five-examples-2305.13210</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-detect-an-animal-sound-from-five-examples-2305.13210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-detect-an-animal-sound-from-five-examples-2305.13210"/></url>
<url><loc>https://scifaro.com/en/abs/computational-models-of-sound-quality-metrics-using-method-for-calculating-loudness-with-gammatone-gammachirp-auditory-filterbank-2305.13213</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computational-models-of-sound-quality-metrics-using-method-for-calculating-loudness-with-gammatone-gammachirp-auditory-filterbank-2305.13213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computational-models-of-sound-quality-metrics-using-method-for-calculating-loudness-with-gammatone-gammachirp-auditory-filterbank-2305.13213"/></url>
<url><loc>https://scifaro.com/en/abs/modulation-extraction-for-lfo-driven-audio-effects-2305.13262</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulation-extraction-for-lfo-driven-audio-effects-2305.13262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulation-extraction-for-lfo-driven-audio-effects-2305.13262"/></url>
<url><loc>https://scifaro.com/en/abs/fluentspeech-stutter-oriented-automatic-speech-editing-with-context-aware-diffusion-models-2305.13612</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fluentspeech-stutter-oriented-automatic-speech-editing-with-context-aware-diffusion-models-2305.13612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fluentspeech-stutter-oriented-automatic-speech-editing-with-context-aware-diffusion-models-2305.13612"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-cross-dataset-fake-audio-based-on-prosodic-and-pronunciation-features-2305.13700</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-cross-dataset-fake-audio-based-on-prosodic-and-pronunciation-features-2305.13700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-cross-dataset-fake-audio-based-on-prosodic-and-pronunciation-features-2305.13700"/></url>
<url><loc>https://scifaro.com/en/abs/to-rawnet-improving-rawnet-with-tcn-and-orthogonal-regularization-for-fake-audio-detection-2305.13701</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-rawnet-improving-rawnet-with-tcn-and-orthogonal-regularization-for-fake-audio-detection-2305.13701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-rawnet-improving-rawnet-with-tcn-and-orthogonal-regularization-for-fake-audio-detection-2305.13701"/></url>
<url><loc>https://scifaro.com/en/abs/calls-japanese-empathetic-dialogue-speech-corpus-of-complaint-handling-and-attentive-listening-in-customer-center-2305.13713</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calls-japanese-empathetic-dialogue-speech-corpus-of-complaint-handling-and-attentive-listening-in-customer-center-2305.13713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calls-japanese-empathetic-dialogue-speech-corpus-of-complaint-handling-and-attentive-listening-in-customer-center-2305.13713"/></url>
<url><loc>https://scifaro.com/en/abs/ba-sot-boundary-aware-serialized-output-training-for-multi-talker-asr-2305.13716</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ba-sot-boundary-aware-serialized-output-training-for-multi-talker-asr-2305.13716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ba-sot-boundary-aware-serialized-output-training-for-multi-talker-asr-2305.13716"/></url>
<url><loc>https://scifaro.com/en/abs/chatgpt-edss-empathetic-dialogue-speech-synthesis-trained-from-chatgpt-derived-context-word-embeddings-2305.13724</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chatgpt-edss-empathetic-dialogue-speech-synthesis-trained-from-chatgpt-derived-context-word-embeddings-2305.13724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chatgpt-edss-empathetic-dialogue-speech-synthesis-trained-from-chatgpt-derived-context-word-embeddings-2305.13724"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-audio-mixing-methods-for-piano-transcription-in-violin-piano-ensembles-2305.13758</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-audio-mixing-methods-for-piano-transcription-in-violin-piano-ensembles-2305.13758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-audio-mixing-methods-for-piano-transcription-in-violin-piano-ensembles-2305.13758"/></url>
<url><loc>https://scifaro.com/en/abs/add-2023-the-second-audio-deepfake-detection-challenge-2305.13774</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/add-2023-the-second-audio-deepfake-detection-challenge-2305.13774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/add-2023-the-second-audio-deepfake-detection-challenge-2305.13774"/></url>
<url><loc>https://scifaro.com/en/abs/se-bridge-speech-enhancement-with-consistent-brownian-bridge-2305.13796</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/se-bridge-speech-enhancement-with-consistent-brownian-bridge-2305.13796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/se-bridge-speech-enhancement-with-consistent-brownian-bridge-2305.13796"/></url>
<url><loc>https://scifaro.com/en/abs/zet-speech-zero-shot-adaptive-emotion-controllable-text-to-speech-synthesis-with-diffusion-and-style-based-models-2305.13831</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zet-speech-zero-shot-adaptive-emotion-controllable-text-to-speech-synthesis-with-diffusion-and-style-based-models-2305.13831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zet-speech-zero-shot-adaptive-emotion-controllable-text-to-speech-synthesis-with-diffusion-and-style-based-models-2305.13831"/></url>
<url><loc>https://scifaro.com/en/abs/happy-or-evil-laughter-analysing-a-database-of-natural-audio-samples-2305.14023</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/happy-or-evil-laughter-analysing-a-database-of-natural-audio-samples-2305.14023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/happy-or-evil-laughter-analysing-a-database-of-natural-audio-samples-2305.14023"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-differentiable-architecture-search-2305.14402</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-differentiable-architecture-search-2305.14402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-differentiable-architecture-search-2305.14402"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-neural-resonators-2305.14867</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-neural-resonators-2305.14867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-neural-resonators-2305.14867"/></url>
<url><loc>https://scifaro.com/en/abs/iteratively-improving-speech-recognition-and-voice-conversion-2305.15055</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iteratively-improving-speech-recognition-and-voice-conversion-2305.15055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iteratively-improving-speech-recognition-and-voice-conversion-2305.15055"/></url>
<url><loc>https://scifaro.com/en/abs/plcmos-a-data-driven-non-intrusive-metric-for-the-evaluation-of-packet-loss-concealment-algorithms-2305.15127</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plcmos-a-data-driven-non-intrusive-metric-for-the-evaluation-of-packet-loss-concealment-algorithms-2305.15127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plcmos-a-data-driven-non-intrusive-metric-for-the-evaluation-of-packet-loss-concealment-algorithms-2305.15127"/></url>
<url><loc>https://scifaro.com/en/abs/sound-design-strategies-for-latent-audio-space-explorations-using-deep-learning-architectures-2305.15571</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-design-strategies-for-latent-audio-space-explorations-using-deep-learning-architectures-2305.15571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-design-strategies-for-latent-audio-space-explorations-using-deep-learning-architectures-2305.15571"/></url>
<url><loc>https://scifaro.com/en/abs/metamathematics-of-algorithmic-composition-2305.15601</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metamathematics-of-algorithmic-composition-2305.15601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metamathematics-of-algorithmic-composition-2305.15601"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-neural-music-generation-2305.15719</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-neural-music-generation-2305.15719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-neural-music-generation-2305.15719"/></url>
<url><loc>https://scifaro.com/en/abs/towards-solving-cocktail-party-the-first-method-to-build-a-realistic-dataset-with-ground-truths-for-speech-separation-2305.15758</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-solving-cocktail-party-the-first-method-to-build-a-realistic-dataset-with-ground-truths-for-speech-separation-2305.15758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-solving-cocktail-party-the-first-method-to-build-a-realistic-dataset-with-ground-truths-for-speech-separation-2305.15758"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-based-on-sound-separation-2305.15859</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-sound-separation-2305.15859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-sound-separation-2305.15859"/></url>
<url><loc>https://scifaro.com/en/abs/room-impulse-response-estimation-in-a-multiple-source-environment-2305.15898</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-impulse-response-estimation-in-a-multiple-source-environment-2305.15898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-impulse-response-estimation-in-a-multiple-source-environment-2305.15898"/></url>
<url><loc>https://scifaro.com/en/abs/latent-diffusion-model-based-foley-sound-generation-system-for-dcase-challenge-2023-task-7-2305.15905</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-diffusion-model-based-foley-sound-generation-system-for-dcase-challenge-2023-task-7-2305.15905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-diffusion-model-based-foley-sound-generation-system-for-dcase-challenge-2023-task-7-2305.15905"/></url>
<url><loc>https://scifaro.com/en/abs/ordered-and-binary-speaker-embedding-2305.16043</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ordered-and-binary-speaker-embedding-2305.16043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ordered-and-binary-speaker-embedding-2305.16043"/></url>
<url><loc>https://scifaro.com/en/abs/visualizing-data-augmentation-in-deep-speaker-recognition-2305.16070</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizing-data-augmentation-in-deep-speaker-recognition-2305.16070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizing-data-augmentation-in-deep-speaker-recognition-2305.16070"/></url>
<url><loc>https://scifaro.com/en/abs/unified-modeling-of-multi-talker-overlapped-speech-recognition-and-diarization-with-a-sidecar-separator-2305.16263</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-modeling-of-multi-talker-overlapped-speech-recognition-and-diarization-with-a-sidecar-separator-2305.16263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-modeling-of-multi-talker-overlapped-speech-recognition-and-diarization-with-a-sidecar-separator-2305.16263"/></url>
<url><loc>https://scifaro.com/en/abs/betray-oneself-a-novel-audio-deepfake-detection-model-via-mono-to-stereo-conversion-2305.16353</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/betray-oneself-a-novel-audio-deepfake-detection-model-via-mono-to-stereo-conversion-2305.16353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/betray-oneself-a-novel-audio-deepfake-detection-model-via-mono-to-stereo-conversion-2305.16353"/></url>
<url><loc>https://scifaro.com/en/abs/soundsieve-seconds-long-audio-event-recognition-on-intermittently-powered-systems-2305.16445</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundsieve-seconds-long-audio-event-recognition-on-intermittently-powered-systems-2305.16445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundsieve-seconds-long-audio-event-recognition-on-intermittently-powered-systems-2305.16445"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-scale-attentive-transformer-for-multi-instrument-symbolic-music-generation-2305.16592</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-scale-attentive-transformer-for-multi-instrument-symbolic-music-generation-2305.16592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-scale-attentive-transformer-for-multi-instrument-symbolic-music-generation-2305.16592"/></url>
<url><loc>https://scifaro.com/en/abs/diverse-and-expressive-speech-prosody-prediction-with-denoising-diffusion-probabilistic-model-2305.16749</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diverse-and-expressive-speech-prosody-prediction-with-denoising-diffusion-probabilistic-model-2305.16749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diverse-and-expressive-speech-prosody-prediction-with-denoising-diffusion-probabilistic-model-2305.16749"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-state-space-model-approach-to-efficient-speech-separation-2305.16932</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-state-space-model-approach-to-efficient-speech-separation-2305.16932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-state-space-model-approach-to-efficient-speech-separation-2305.16932"/></url>
<url><loc>https://scifaro.com/en/abs/distriblock-identifying-adversarial-audio-samples-by-leveraging-characteristics-of-the-output-distribution-2305.17000</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distriblock-identifying-adversarial-audio-samples-by-leveraging-characteristics-of-the-output-distribution-2305.17000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distriblock-identifying-adversarial-audio-samples-by-leveraging-characteristics-of-the-output-distribution-2305.17000"/></url>
<url><loc>https://scifaro.com/en/abs/spot-keywords-from-very-noisy-and-mixed-speech-2305.17706</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spot-keywords-from-very-noisy-and-mixed-speech-2305.17706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spot-keywords-from-very-noisy-and-mixed-speech-2305.17706"/></url>
<url><loc>https://scifaro.com/en/abs/styles2st-zero-shot-style-transfer-for-direct-speech-to-speech-translation-2305.17732</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styles2st-zero-shot-style-transfer-for-direct-speech-to-speech-translation-2305.17732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styles2st-zero-shot-style-transfer-for-direct-speech-to-speech-translation-2305.17732"/></url>
<url><loc>https://scifaro.com/en/abs/range-based-equal-error-rate-for-spoof-localization-2305.17739</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/range-based-equal-error-rate-for-spoof-localization-2305.17739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/range-based-equal-error-rate-for-spoof-localization-2305.17739"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-inference-and-neural-estimation-of-acoustic-wave-propagation-2305.17749</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-inference-and-neural-estimation-of-acoustic-wave-propagation-2305.17749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-inference-and-neural-estimation-of-acoustic-wave-propagation-2305.17749"/></url>
<url><loc>https://scifaro.com/en/abs/captdure-captioned-sound-dataset-of-single-sources-2305.17758</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/captdure-captioned-sound-dataset-of-single-sources-2305.17758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/captdure-captioned-sound-dataset-of-single-sources-2305.17758"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-audio-transformers-for-online-audio-tagging-2305.17834</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-audio-transformers-for-online-audio-tagging-2305.17834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-audio-transformers-for-online-audio-tagging-2305.17834"/></url>
<url><loc>https://scifaro.com/en/abs/retraining-free-customized-asr-for-enharmonic-words-based-on-a-named-entity-aware-model-and-phoneme-similarity-estimation-2305.17846</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retraining-free-customized-asr-for-enharmonic-words-based-on-a-named-entity-aware-model-and-phoneme-similarity-estimation-2305.17846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retraining-free-customized-asr-for-enharmonic-words-based-on-a-named-entity-aware-model-and-phoneme-similarity-estimation-2305.17846"/></url>
<url><loc>https://scifaro.com/en/abs/speech-and-noise-dual-stream-spectrogram-refine-network-with-speech-distortion-loss-for-robust-speech-recognition-2305.17860</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-and-noise-dual-stream-spectrogram-refine-network-with-speech-distortion-loss-for-robust-speech-recognition-2305.17860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-and-noise-dual-stream-spectrogram-refine-network-with-speech-distortion-loss-for-robust-speech-recognition-2305.17860"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-attention-for-audio-question-answering-2305.17993</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-attention-for-audio-question-answering-2305.17993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-attention-for-audio-question-answering-2305.17993"/></url>
<url><loc>https://scifaro.com/en/abs/adaptermix-exploring-the-efficacy-of-mixture-of-adapters-for-low-resource-tts-adaptation-2305.18028</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptermix-exploring-the-efficacy-of-mixture-of-adapters-for-low-resource-tts-adaptation-2305.18028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptermix-exploring-the-efficacy-of-mixture-of-adapters-for-low-resource-tts-adaptation-2305.18028"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-adaptively-refined-prototypes-2305.18045</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-adaptively-refined-prototypes-2305.18045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-adaptively-refined-prototypes-2305.18045"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-efficient-end-to-end-asr-using-discretized-input-from-self-supervised-learning-2305.18108</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-efficient-end-to-end-asr-using-discretized-input-from-self-supervised-learning-2305.18108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-efficient-end-to-end-asr-using-discretized-input-from-self-supervised-learning-2305.18108"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-membership-inference-attack-for-the-diffusion-model-by-proximal-initialization-2305.18355</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-membership-inference-attack-for-the-diffusion-model-by-proximal-initialization-2305.18355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-membership-inference-attack-for-the-diffusion-model-by-proximal-initialization-2305.18355"/></url>
<url><loc>https://scifaro.com/en/abs/speech-intelligibility-assessment-of-dysarthric-speech-by-using-goodness-of-pronunciation-with-uncertainty-quantification-2305.18392</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-intelligibility-assessment-of-dysarthric-speech-by-using-goodness-of-pronunciation-with-uncertainty-quantification-2305.18392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-intelligibility-assessment-of-dysarthric-speech-by-using-goodness-of-pronunciation-with-uncertainty-quantification-2305.18392"/></url>
<url><loc>https://scifaro.com/en/abs/make-an-audio-2-temporal-enhanced-text-to-audio-generation-2305.18474</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/make-an-audio-2-temporal-enhanced-text-to-audio-generation-2305.18474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/make-an-audio-2-temporal-enhanced-text-to-audio-generation-2305.18474"/></url>
<url><loc>https://scifaro.com/en/abs/building-accurate-low-latency-asr-for-streaming-voice-search-2305.18596</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-accurate-low-latency-asr-for-streaming-voice-search-2305.18596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-accurate-low-latency-asr-for-streaming-voice-search-2305.18596"/></url>
<url><loc>https://scifaro.com/en/abs/e-panns-sound-recognition-using-efficient-pre-trained-audio-neural-networks-2305.18665</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e-panns-sound-recognition-using-efficient-pre-trained-audio-neural-networks-2305.18665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e-panns-sound-recognition-using-efficient-pre-trained-audio-neural-networks-2305.18665"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-temporally-weakly-supervised-training-a-case-study-for-keyword-spotting-2305.18794</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-temporally-weakly-supervised-training-a-case-study-for-keyword-spotting-2305.18794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-temporally-weakly-supervised-training-a-case-study-for-keyword-spotting-2305.18794"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymization-using-orthogonal-householder-neural-network-2305.18823</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymization-using-orthogonal-householder-neural-network-2305.18823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymization-using-orthogonal-householder-neural-network-2305.18823"/></url>
<url><loc>https://scifaro.com/en/abs/pseudo-siamese-network-based-timbre-reserved-black-box-adversarial-attack-in-speaker-identification-2305.19020</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseudo-siamese-network-based-timbre-reserved-black-box-adversarial-attack-in-speaker-identification-2305.19020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseudo-siamese-network-based-timbre-reserved-black-box-adversarial-attack-in-speaker-identification-2305.19020"/></url>
<url><loc>https://scifaro.com/en/abs/adaptation-of-tongue-ultrasound-based-silent-speech-interfaces-using-spatial-transformer-networks-2305.19130</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptation-of-tongue-ultrasound-based-silent-speech-interfaces-using-spatial-transformer-networks-2305.19130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptation-of-tongue-ultrasound-based-silent-speech-interfaces-using-spatial-transformer-networks-2305.19130"/></url>
<url><loc>https://scifaro.com/en/abs/audio-classification-using-ml-methods-2305.19304</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-classification-using-ml-methods-2305.19304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-classification-using-ml-methods-2305.19304"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-audio-visual-learning-framework-for-localization-separation-and-recognition-2305.19458</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-audio-visual-learning-framework-for-localization-separation-and-recognition-2305.19458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-audio-visual-learning-framework-for-localization-separation-and-recognition-2305.19458"/></url>
<url><loc>https://scifaro.com/en/abs/promptstyle-controllable-style-transfer-for-text-to-speech-with-natural-language-descriptions-2305.19522</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/promptstyle-controllable-style-transfer-for-text-to-speech-with-natural-language-descriptions-2305.19522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/promptstyle-controllable-style-transfer-for-text-to-speech-with-natural-language-descriptions-2305.19522"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-automatic-pronunciation-assessment-2305.19563</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-automatic-pronunciation-assessment-2305.19563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-automatic-pronunciation-assessment-2305.19563"/></url>
<url><loc>https://scifaro.com/en/abs/dc-comix-tts-an-end-to-end-expressive-tts-with-discrete-code-collaborated-with-mixer-2305.19567</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dc-comix-tts-an-end-to-end-expressive-tts-with-discrete-code-collaborated-with-mixer-2305.19567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dc-comix-tts-an-end-to-end-expressive-tts-with-discrete-code-collaborated-with-mixer-2305.19567"/></url>
<url><loc>https://scifaro.com/en/abs/svvad-personal-voice-activity-detection-for-speaker-verification-2305.19581</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svvad-personal-voice-activity-detection-for-speaker-verification-2305.19581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svvad-personal-voice-activity-detection-for-speaker-verification-2305.19581"/></url>
<url><loc>https://scifaro.com/en/abs/learning-music-sequence-representation-from-text-supervision-2305.19602</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-music-sequence-representation-from-text-supervision-2305.19602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-music-sequence-representation-from-text-supervision-2305.19602"/></url>
<url><loc>https://scifaro.com/en/abs/intelligible-lip-to-speech-synthesis-with-speech-units-2305.19603</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intelligible-lip-to-speech-synthesis-with-speech-units-2305.19603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intelligible-lip-to-speech-synthesis-with-speech-units-2305.19603"/></url>
<url><loc>https://scifaro.com/en/abs/underwater-art-expanding-information-perspectives-with-text-templates-for-underwater-acoustic-target-recognition-2305.19612</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underwater-art-expanding-information-perspectives-with-text-templates-for-underwater-acoustic-target-recognition-2305.19612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underwater-art-expanding-information-perspectives-with-text-templates-for-underwater-acoustic-target-recognition-2305.19612"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dataset-co-training-with-sharpness-aware-optimization-for-audio-anti-spoofing-2305.19953</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dataset-co-training-with-sharpness-aware-optimization-for-audio-anti-spoofing-2305.19953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dataset-co-training-with-sharpness-aware-optimization-for-audio-anti-spoofing-2305.19953"/></url>
<url><loc>https://scifaro.com/en/abs/unssor-unsupervised-neural-speech-separation-by-leveraging-over-determined-training-mixtures-2305.20054</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unssor-unsupervised-neural-speech-separation-by-leveraging-over-determined-training-mixtures-2305.20054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unssor-unsupervised-neural-speech-separation-by-leveraging-over-determined-training-mixtures-2305.20054"/></url>
<url><loc>https://scifaro.com/en/abs/mert-acoustic-music-understanding-model-with-large-scale-self-supervised-training-2306.00107</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mert-acoustic-music-understanding-model-with-large-scale-self-supervised-training-2306.00107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mert-acoustic-music-understanding-model-with-large-scale-self-supervised-training-2306.00107"/></url>
<url><loc>https://scifaro.com/en/abs/musecoco-generating-symbolic-music-from-text-2306.00110</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musecoco-generating-symbolic-music-from-text-2306.00110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musecoco-generating-symbolic-music-from-text-2306.00110"/></url>
<url><loc>https://scifaro.com/en/abs/speech-inpainting-context-based-speech-synthesis-guided-by-video-2306.00489</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-inpainting-context-based-speech-synthesis-guided-by-video-2306.00489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-inpainting-context-based-speech-synthesis-guided-by-video-2306.00489"/></url>
<url><loc>https://scifaro.com/en/abs/masked-autoencoders-with-multi-window-local-global-attention-are-better-audio-learners-2306.00561</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-autoencoders-with-multi-window-local-global-attention-are-better-audio-learners-2306.00561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-autoencoders-with-multi-window-local-global-attention-are-better-audio-learners-2306.00561"/></url>
<url><loc>https://scifaro.com/en/abs/adaptation-and-optimization-of-automatic-speech-recognition-asr-for-the-maritime-domain-in-the-field-of-vhf-communication-2306.00614</loc><lastmod>2025-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptation-and-optimization-of-automatic-speech-recognition-asr-for-the-maritime-domain-in-the-field-of-vhf-communication-2306.00614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptation-and-optimization-of-automatic-speech-recognition-asr-for-the-maritime-domain-in-the-field-of-vhf-communication-2306.00614"/></url>
<url><loc>https://scifaro.com/en/abs/emomix-emotion-mixing-via-diffusion-models-for-emotional-speech-synthesis-2306.00648</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emomix-emotion-mixing-via-diffusion-models-for-emotional-speech-synthesis-2306.00648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emomix-emotion-mixing-via-diffusion-models-for-emotional-speech-synthesis-2306.00648"/></url>
<url><loc>https://scifaro.com/en/abs/encoder-decoder-multimodal-speaker-change-detection-2306.00680</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/encoder-decoder-multimodal-speaker-change-detection-2306.00680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/encoder-decoder-multimodal-speaker-change-detection-2306.00680"/></url>
<url><loc>https://scifaro.com/en/abs/stuttering-detection-using-speaker-representations-and-self-supervised-contextual-embeddings-2306.00689</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stuttering-detection-using-speaker-representations-and-self-supervised-contextual-embeddings-2306.00689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stuttering-detection-using-speaker-representations-and-self-supervised-contextual-embeddings-2306.00689"/></url>
<url><loc>https://scifaro.com/en/abs/undiff-unsupervised-voice-restoration-with-unconditional-diffusion-model-2306.00721</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/undiff-unsupervised-voice-restoration-with-unconditional-diffusion-model-2306.00721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/undiff-unsupervised-voice-restoration-with-unconditional-diffusion-model-2306.00721"/></url>
<url><loc>https://scifaro.com/en/abs/slothspeech-denial-of-service-attack-against-speech-recognition-models-2306.00794</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slothspeech-denial-of-service-attack-against-speech-recognition-models-2306.00794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slothspeech-denial-of-service-attack-against-speech-recognition-models-2306.00794"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-contextual-biasing-for-transducer-based-streaming-speech-recognition-2306.00804</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-contextual-biasing-for-transducer-based-streaming-speech-recognition-2306.00804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-contextual-biasing-for-transducer-based-streaming-speech-recognition-2306.00804"/></url>
<url><loc>https://scifaro.com/en/abs/vocos-closing-the-gap-between-time-domain-and-fourier-based-neural-vocoders-for-high-quality-audio-synthesis-2306.00814</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocos-closing-the-gap-between-time-domain-and-fourier-based-neural-vocoders-for-high-quality-audio-synthesis-2306.00814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocos-closing-the-gap-between-time-domain-and-fourier-based-neural-vocoders-for-high-quality-audio-synthesis-2306.00814"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-a-convnext-model-to-audio-classification-on-audioset-2306.00830</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-a-convnext-model-to-audio-classification-on-audioset-2306.00830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-a-convnext-model-to-audio-classification-on-audioset-2306.00830"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-allpass-filters-for-phase-response-estimation-and-automatic-signal-alignment-2306.00860</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-allpass-filters-for-phase-response-estimation-and-automatic-signal-alignment-2306.00860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-allpass-filters-for-phase-response-estimation-and-automatic-signal-alignment-2306.00860"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-on-hubert-with-multiple-resolutions-2306.01084</loc><lastmod>2023-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-on-hubert-with-multiple-resolutions-2306.01084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-on-hubert-with-multiple-resolutions-2306.01084"/></url>
<url><loc>https://scifaro.com/en/abs/jepoo-highly-accurate-joint-estimation-of-pitch-onset-and-offset-for-music-information-retrieval-2306.01304</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jepoo-highly-accurate-joint-estimation-of-pitch-onset-and-offset-for-music-information-retrieval-2306.01304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jepoo-highly-accurate-joint-estimation-of-pitch-onset-and-offset-for-music-information-retrieval-2306.01304"/></url>
<url><loc>https://scifaro.com/en/abs/improved-deepfake-detection-using-whisper-features-2306.01428</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-deepfake-detection-using-whisper-features-2306.01428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-deepfake-detection-using-whisper-features-2306.01428"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-fastspeech-2-by-modelling-residual-multimodality-2306.01442</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-fastspeech-2-by-modelling-residual-multimodality-2306.01442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-fastspeech-2-by-modelling-residual-multimodality-2306.01442"/></url>
<url><loc>https://scifaro.com/en/abs/learning-local-to-global-feature-aggregation-for-speech-emotion-recognition-2306.01491</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-local-to-global-feature-aggregation-for-speech-emotion-recognition-2306.01491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-local-to-global-feature-aggregation-for-speech-emotion-recognition-2306.01491"/></url>
<url><loc>https://scifaro.com/en/abs/enhance-temporal-relations-in-audio-captioning-with-sound-event-detection-2306.01533</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhance-temporal-relations-in-audio-captioning-with-sound-event-detection-2306.01533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhance-temporal-relations-in-audio-captioning-with-sound-event-detection-2306.01533"/></url>
<url><loc>https://scifaro.com/en/abs/q-a-query-based-representation-learning-for-multi-track-symbolic-music-re-arrangement-2306.01635</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/q-a-query-based-representation-learning-for-multi-track-symbolic-music-re-arrangement-2306.01635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/q-a-query-based-representation-learning-for-multi-track-symbolic-music-re-arrangement-2306.01635"/></url>
<url><loc>https://scifaro.com/en/abs/edit-distance-based-rl-for-rnnt-decoding-2306.01789</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edit-distance-based-rl-for-rnnt-decoding-2306.01789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edit-distance-based-rl-for-rnnt-decoding-2306.01789"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-multi-task-representation-learning-for-mispronunciation-detection-2306.01845</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-multi-task-representation-learning-for-mispronunciation-detection-2306.01845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-multi-task-representation-learning-for-mispronunciation-detection-2306.01845"/></url>
<url><loc>https://scifaro.com/en/abs/bedrf-bidirectional-edge-diffraction-response-function-for-interactive-sound-propagation-2306.01974</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bedrf-bidirectional-edge-diffraction-response-function-for-interactive-sound-propagation-2306.01974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bedrf-bidirectional-edge-diffraction-response-function-for-interactive-sound-propagation-2306.01974"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-tonal-coarticulation-and-prosodic-positions-on-tonal-contours-of-low-rising-tones-in-the-case-of-xiamen-dialect-2306.02251</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-tonal-coarticulation-and-prosodic-positions-on-tonal-contours-of-low-rising-tones-in-the-case-of-xiamen-dialect-2306.02251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-tonal-coarticulation-and-prosodic-positions-on-tonal-contours-of-low-rising-tones-in-the-case-of-xiamen-dialect-2306.02251"/></url>
<url><loc>https://scifaro.com/en/abs/mavd-the-first-open-large-scale-mandarin-audio-visual-dataset-with-depth-information-2306.02263</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mavd-the-first-open-large-scale-mandarin-audio-visual-dataset-with-depth-information-2306.02263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mavd-the-first-open-large-scale-mandarin-audio-visual-dataset-with-depth-information-2306.02263"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-lyrics-to-melody-generation-2306.02613</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-lyrics-to-melody-generation-2306.02613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-lyrics-to-melody-generation-2306.02613"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-the-visual-cues-in-audio-visual-speaker-extraction-2306.02625</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-the-visual-cues-in-audio-visual-speaker-extraction-2306.02625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-the-visual-cues-in-audio-visual-speaker-extraction-2306.02625"/></url>
<url><loc>https://scifaro.com/en/abs/the-learning-prescription-a-neural-network-hearing-aid-core-2306.02750</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-learning-prescription-a-neural-network-hearing-aid-core-2306.02750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-learning-prescription-a-neural-network-hearing-aid-core-2306.02750"/></url>
<url><loc>https://scifaro.com/en/abs/deepvqe-real-time-deep-voice-quality-enhancement-for-joint-acoustic-echo-cancellation-noise-suppression-and-dereverberation-2306.03177</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepvqe-real-time-deep-voice-quality-enhancement-for-joint-acoustic-echo-cancellation-noise-suppression-and-dereverberation-2306.03177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepvqe-real-time-deep-voice-quality-enhancement-for-joint-acoustic-echo-cancellation-noise-suppression-and-dereverberation-2306.03177"/></url>
<url><loc>https://scifaro.com/en/abs/reef-elegy-an-auditory-display-of-hawaii-s-2019-coral-bleaching-data-2306.03307</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reef-elegy-an-auditory-display-of-hawaii-s-2019-coral-bleaching-data-2306.03307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reef-elegy-an-auditory-display-of-hawaii-s-2019-coral-bleaching-data-2306.03307"/></url>
<url><loc>https://scifaro.com/en/abs/phase-perturbation-improves-channel-robustness-for-speech-spoofing-countermeasures-2306.03389</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-perturbation-improves-channel-robustness-for-speech-spoofing-countermeasures-2306.03389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-perturbation-improves-channel-robustness-for-speech-spoofing-countermeasures-2306.03389"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-conditioned-melody-harmonization-with-hierarchical-variational-autoencoder-2306.03718</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-conditioned-melody-harmonization-with-hierarchical-variational-autoencoder-2306.03718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-conditioned-melody-harmonization-with-hierarchical-variational-autoencoder-2306.03718"/></url>
<url><loc>https://scifaro.com/en/abs/risc-a-corpus-for-shout-type-classification-and-shout-intensity-prediction-2306.04143</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/risc-a-corpus-for-shout-type-classification-and-shout-intensity-prediction-2306.04143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/risc-a-corpus-for-shout-type-classification-and-shout-intensity-prediction-2306.04143"/></url>
<url><loc>https://scifaro.com/en/abs/sangeet-a-xml-based-open-dataset-for-research-in-hindustani-sangeet-2306.04148</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sangeet-a-xml-based-open-dataset-for-research-in-hindustani-sangeet-2306.04148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sangeet-a-xml-based-open-dataset-for-research-in-hindustani-sangeet-2306.04148"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-automatic-speech-segmentation-in-meetings-based-on-circular-harmonics-features-2306.04268</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-automatic-speech-segmentation-in-meetings-based-on-circular-harmonics-features-2306.04268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-automatic-speech-segmentation-in-meetings-based-on-circular-harmonics-features-2306.04268"/></url>
<url><loc>https://scifaro.com/en/abs/a-mask-free-neural-network-for-monaural-speech-enhancement-2306.04286</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mask-free-neural-network-for-monaural-speech-enhancement-2306.04286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mask-free-neural-network-for-monaural-speech-enhancement-2306.04286"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-style-transfer-for-text-to-speech-with-controlvae-and-diffusion-bridge-2306.04301</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-style-transfer-for-text-to-speech-with-controlvae-and-diffusion-bridge-2306.04301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-style-transfer-for-text-to-speech-with-controlvae-and-diffusion-bridge-2306.04301"/></url>
<url><loc>https://scifaro.com/en/abs/arabic-dysarthric-speech-recognition-using-adversarial-and-signal-based-augmentation-2306.04368</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arabic-dysarthric-speech-recognition-using-adversarial-and-signal-based-augmentation-2306.04368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arabic-dysarthric-speech-recognition-using-adversarial-and-signal-based-augmentation-2306.04368"/></url>
<url><loc>https://scifaro.com/en/abs/systematic-analysis-of-music-representations-from-bert-2306.04628</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/systematic-analysis-of-music-representations-from-bert-2306.04628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/systematic-analysis-of-music-representations-from-bert-2306.04628"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-fake-audio-detection-with-low-rank-model-squeezing-2306.04956</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-fake-audio-detection-with-low-rank-model-squeezing-2306.04956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-fake-audio-detection-with-low-rank-model-squeezing-2306.04956"/></url>
<url><loc>https://scifaro.com/en/abs/language-specific-acoustic-boundary-learning-for-mandarin-english-code-switching-speech-recognition-2306.05279</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-specific-acoustic-boundary-learning-for-mandarin-english-code-switching-speech-recognition-2306.05279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-specific-acoustic-boundary-learning-for-mandarin-english-code-switching-speech-recognition-2306.05279"/></url>
<url><loc>https://scifaro.com/en/abs/simple-and-controllable-music-generation-2306.05284</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simple-and-controllable-music-generation-2306.05284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simple-and-controllable-music-generation-2306.05284"/></url>
<url><loc>https://scifaro.com/en/abs/peft-ser-on-the-use-of-parameter-efficient-transfer-learning-approaches-for-speech-emotion-recognition-using-pre-trained-speech-models-2306.05350</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peft-ser-on-the-use-of-parameter-efficient-transfer-learning-approaches-for-speech-emotion-recognition-using-pre-trained-speech-models-2306.05350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peft-ser-on-the-use-of-parameter-efficient-transfer-learning-approaches-for-speech-emotion-recognition-using-pre-trained-speech-models-2306.05350"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-adaptation-method-for-wav2vec2-based-fake-audio-detection-2306.05617</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-adaptation-method-for-wav2vec2-based-fake-audio-detection-2306.05617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-adaptation-method-for-wav2vec2-based-fake-audio-detection-2306.05617"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-fast-and-high-quality-speech-synthesis-with-linear-diffusion-2306.05708</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-fast-and-high-quality-speech-synthesis-with-linear-diffusion-2306.05708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-fast-and-high-quality-speech-synthesis-with-linear-diffusion-2306.05708"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-human-expressiveness-in-piano-performances-with-a-transformer-network-2306.06040</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-human-expressiveness-in-piano-performances-with-a-transformer-network-2306.06040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-human-expressiveness-in-piano-performances-with-a-transformer-network-2306.06040"/></url>
<url><loc>https://scifaro.com/en/abs/improving-fairness-and-robustness-in-end-to-end-speech-recognition-through-unsupervised-clustering-2306.06083</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-fairness-and-robustness-in-end-to-end-speech-recognition-through-unsupervised-clustering-2306.06083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-fairness-and-robustness-in-end-to-end-speech-recognition-through-unsupervised-clustering-2306.06083"/></url>
<url><loc>https://scifaro.com/en/abs/everybody-compose-deep-beats-to-music-2306.06284</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/everybody-compose-deep-beats-to-music-2306.06284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/everybody-compose-deep-beats-to-music-2306.06284"/></url>
<url><loc>https://scifaro.com/en/abs/vocoder-free-non-parallel-conversion-of-whispered-speech-with-masked-cycle-consistent-generative-adversarial-networks-2306.06514</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocoder-free-non-parallel-conversion-of-whispered-speech-with-masked-cycle-consistent-generative-adversarial-networks-2306.06514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocoder-free-non-parallel-conversion-of-whispered-speech-with-masked-cycle-consistent-generative-adversarial-networks-2306.06514"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-audio-compression-with-improved-rvqgan-2306.06546</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-audio-compression-with-improved-rvqgan-2306.06546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-audio-compression-with-improved-rvqgan-2306.06546"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-mandarin-electrolaryngeal-speech-voice-conversion-2306.06652</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-mandarin-electrolaryngeal-speech-voice-conversion-2306.06652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-mandarin-electrolaryngeal-speech-voice-conversion-2306.06652"/></url>
<url><loc>https://scifaro.com/en/abs/mandarin-electrolaryngeal-speech-voice-conversion-using-cross-domain-features-2306.06653</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mandarin-electrolaryngeal-speech-voice-conversion-using-cross-domain-features-2306.06653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mandarin-electrolaryngeal-speech-voice-conversion-using-cross-domain-features-2306.06653"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-the-uncertainty-in-emotion-attributes-using-deep-evidential-regression-2306.06760</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-the-uncertainty-in-emotion-attributes-using-deep-evidential-regression-2306.06760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-the-uncertainty-in-emotion-attributes-using-deep-evidential-regression-2306.06760"/></url>
<url><loc>https://scifaro.com/en/abs/underwater-acoustic-target-recognition-based-on-smoothness-inducing-regularization-and-spectrogram-based-data-augmentation-2306.06945</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underwater-acoustic-target-recognition-based-on-smoothness-inducing-regularization-and-spectrogram-based-data-augmentation-2306.06945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underwater-acoustic-target-recognition-based-on-smoothness-inducing-regularization-and-spectrogram-based-data-augmentation-2306.06945"/></url>
<url><loc>https://scifaro.com/en/abs/unicats-a-unified-context-aware-text-to-speech-framework-with-contextual-vq-diffusion-and-vocoding-2306.07547</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unicats-a-unified-context-aware-text-to-speech-framework-with-contextual-vq-diffusion-and-vocoding-2306.07547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unicats-a-unified-context-aware-text-to-speech-framework-with-contextual-vq-diffusion-and-vocoding-2306.07547"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-based-audio-to-lyrics-alignment-for-multiple-languages-2306.07744</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-based-audio-to-lyrics-alignment-for-multiple-languages-2306.07744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-based-audio-to-lyrics-alignment-for-multiple-languages-2306.07744"/></url>
<url><loc>https://scifaro.com/en/abs/unlocking-foundation-models-for-privacy-enhancing-speech-understanding-an-early-study-on-low-resource-speech-training-leveraging-label-guided-synthetic-speech-content-2306.07791</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unlocking-foundation-models-for-privacy-enhancing-speech-understanding-an-early-study-on-low-resource-speech-training-leveraging-label-guided-synthetic-speech-content-2306.07791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unlocking-foundation-models-for-privacy-enhancing-speech-understanding-an-early-study-on-low-resource-speech-training-leveraging-label-guided-synthetic-speech-content-2306.07791"/></url>
<url><loc>https://scifaro.com/en/abs/domain-information-control-at-inference-time-for-acoustic-scene-classification-2306.08010</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-information-control-at-inference-time-for-acoustic-scene-classification-2306.08010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-information-control-at-inference-time-for-acoustic-scene-classification-2306.08010"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-scheme-to-classify-read-and-spontaneous-speech-2306.08012</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-scheme-to-classify-read-and-spontaneous-speech-2306.08012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-scheme-to-classify-read-and-spontaneous-speech-2306.08012"/></url>
<url><loc>https://scifaro.com/en/abs/research-on-an-improved-conformer-end-to-end-speech-recognition-model-with-r-drop-structure-2306.08329</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-on-an-improved-conformer-end-to-end-speech-recognition-model-with-r-drop-structure-2306.08329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-on-an-improved-conformer-end-to-end-speech-recognition-model-with-r-drop-structure-2306.08329"/></url>
<url><loc>https://scifaro.com/en/abs/gesper-a-restoration-enhancement-framework-for-general-speech-reconstruction-2306.08454</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gesper-a-restoration-enhancement-framework-for-general-speech-reconstruction-2306.08454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gesper-a-restoration-enhancement-framework-for-general-speech-reconstruction-2306.08454"/></url>
<url><loc>https://scifaro.com/en/abs/combining-piano-performance-dimensions-for-score-difficulty-classification-2306.08480</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-piano-performance-dimensions-for-score-difficulty-classification-2306.08480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-piano-performance-dimensions-for-score-difficulty-classification-2306.08480"/></url>
<url><loc>https://scifaro.com/en/abs/anticipatory-music-transformer-2306.08620</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anticipatory-music-transformer-2306.08620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anticipatory-music-transformer-2306.08620"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-intelligibility-assessment-with-utterance-level-alignment-distance-between-teacher-and-learner-wav2vec-2-0-representations-2306.08845</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-intelligibility-assessment-with-utterance-level-alignment-distance-between-teacher-and-learner-wav2vec-2-0-representations-2306.08845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-intelligibility-assessment-with-utterance-level-alignment-distance-between-teacher-and-learner-wav2vec-2-0-representations-2306.08845"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-isolated-musical-notes-as-pre-training-data-for-predominant-instrument-recognition-in-polyphonic-music-2306.08850</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-isolated-musical-notes-as-pre-training-data-for-predominant-instrument-recognition-in-polyphonic-music-2306.08850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-isolated-musical-notes-as-pre-training-data-for-predominant-instrument-recognition-in-polyphonic-music-2306.08850"/></url>
<url><loc>https://scifaro.com/en/abs/multi-loss-convolutional-network-with-time-frequency-attention-for-speech-enhancement-2306.08956</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-loss-convolutional-network-with-time-frequency-attention-for-speech-enhancement-2306.08956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-loss-convolutional-network-with-time-frequency-attention-for-speech-enhancement-2306.08956"/></url>
<url><loc>https://scifaro.com/en/abs/team-acielee-technical-report-for-epic-sounds-audio-based-interaction-recognition-challenge-2023-2306.08998</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/team-acielee-technical-report-for-epic-sounds-audio-based-interaction-recognition-challenge-2023-2306.08998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/team-acielee-technical-report-for-epic-sounds-audio-based-interaction-recognition-challenge-2023-2306.08998"/></url>
<url><loc>https://scifaro.com/en/abs/coverhunter-cover-song-identification-with-refined-attention-and-alignments-2306.09025</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coverhunter-cover-song-identification-with-refined-attention-and-alignments-2306.09025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coverhunter-cover-song-identification-with-refined-attention-and-alignments-2306.09025"/></url>
<url><loc>https://scifaro.com/en/abs/environmental-sound-classification-on-an-embedded-hardware-platform-2306.09106</loc><lastmod>2025-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environmental-sound-classification-on-an-embedded-hardware-platform-2306.09106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environmental-sound-classification-on-an-embedded-hardware-platform-2306.09106"/></url>
<url><loc>https://scifaro.com/en/abs/starss23-an-audio-visual-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2306.09126</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/starss23-an-audio-visual-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2306.09126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/starss23-an-audio-visual-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2306.09126"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2023-challenge-2306.09223</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2023-challenge-2306.09223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-at-the-dcase-2023-challenge-2306.09223"/></url>
<url><loc>https://scifaro.com/en/abs/sound-demixing-challenge-2023-music-demixing-track-technical-report-tfc-tdf-unet-v3-2306.09382</loc><lastmod>2023-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-demixing-challenge-2023-music-demixing-track-technical-report-tfc-tdf-unet-v3-2306.09382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-demixing-challenge-2023-music-demixing-track-technical-report-tfc-tdf-unet-v3-2306.09382"/></url>
<url><loc>https://scifaro.com/en/abs/competitive-and-resource-efficient-factored-hybrid-hmm-systems-are-simpler-than-you-think-2306.09517</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/competitive-and-resource-efficient-factored-hybrid-hmm-systems-are-simpler-than-you-think-2306.09517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/competitive-and-resource-efficient-factored-hybrid-hmm-systems-are-simpler-than-you-think-2306.09517"/></url>
<url><loc>https://scifaro.com/en/abs/clipsonic-text-to-audio-synthesis-with-unlabeled-videos-and-pretrained-language-vision-models-2306.09635</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clipsonic-text-to-audio-synthesis-with-unlabeled-videos-and-pretrained-language-vision-models-2306.09635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clipsonic-text-to-audio-synthesis-with-unlabeled-videos-and-pretrained-language-vision-models-2306.09635"/></url>
<url><loc>https://scifaro.com/en/abs/correlation-clustering-of-bird-sounds-2306.09906</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correlation-clustering-of-bird-sounds-2306.09906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correlation-clustering-of-bird-sounds-2306.09906"/></url>
<url><loc>https://scifaro.com/en/abs/realimpact-a-dataset-of-impact-sound-fields-for-real-objects-2306.09944</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realimpact-a-dataset-of-impact-sound-fields-for-real-objects-2306.09944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realimpact-a-dataset-of-impact-sound-fields-for-real-objects-2306.09944"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-speech-representations-for-mos-prediction-2306.09979</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-speech-representations-for-mos-prediction-2306.09979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-speech-representations-for-mos-prediction-2306.09979"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-identification-of-ae-aegypti-mosquitoes-using-smartphone-apps-and-residual-convolutional-neural-networks-2306.10091</loc><lastmod>2024-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-identification-of-ae-aegypti-mosquitoes-using-smartphone-apps-and-residual-convolutional-neural-networks-2306.10091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-identification-of-ae-aegypti-mosquitoes-using-smartphone-apps-and-residual-convolutional-neural-networks-2306.10091"/></url>
<url><loc>https://scifaro.com/en/abs/musico-acoustic-depictions-of-laminar-and-turbulent-flows-in-ligeti-piano-etude-no-9-and-a-novel-method-of-analysis-2306.10093</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musico-acoustic-depictions-of-laminar-and-turbulent-flows-in-ligeti-piano-etude-no-9-and-a-novel-method-of-analysis-2306.10093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musico-acoustic-depictions-of-laminar-and-turbulent-flows-in-ligeti-piano-etude-no-9-and-a-novel-method-of-analysis-2306.10093"/></url>
<url><loc>https://scifaro.com/en/abs/neural-fast-full-rank-spatial-covariance-analysis-for-blind-source-separation-2306.10240</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-fast-full-rank-spatial-covariance-analysis-for-blind-source-separation-2306.10240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-fast-full-rank-spatial-covariance-analysis-for-blind-source-separation-2306.10240"/></url>
<url><loc>https://scifaro.com/en/abs/text-driven-foley-sound-generation-with-latent-diffusion-model-2306.10359</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-driven-foley-sound-generation-with-latent-diffusion-model-2306.10359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-driven-foley-sound-generation-with-latent-diffusion-model-2306.10359"/></url>
<url><loc>https://scifaro.com/en/abs/mospc-mos-prediction-based-on-pairwise-comparison-2306.10493</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mospc-mos-prediction-based-on-pairwise-comparison-2306.10493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mospc-mos-prediction-based-on-pairwise-comparison-2306.10493"/></url>
<url><loc>https://scifaro.com/en/abs/marble-music-audio-representation-benchmark-for-universal-evaluation-2306.10548</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/marble-music-audio-representation-benchmark-for-universal-evaluation-2306.10548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/marble-music-audio-representation-benchmark-for-universal-evaluation-2306.10548"/></url>
<url><loc>https://scifaro.com/en/abs/visually-guided-sound-source-separation-with-audio-visual-predictive-coding-2306.10684</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-guided-sound-source-separation-with-audio-visual-predictive-coding-2306.10684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-guided-sound-source-separation-with-audio-visual-predictive-coding-2306.10684"/></url>
<url><loc>https://scifaro.com/en/abs/algorithms-of-sampling-frequency-independent-layers-for-non-integer-strides-2306.10718</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/algorithms-of-sampling-frequency-independent-layers-for-non-integer-strides-2306.10718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/algorithms-of-sampling-frequency-independent-layers-for-non-integer-strides-2306.10718"/></url>
<url><loc>https://scifaro.com/en/abs/learning-an-interpretable-end-to-end-network-for-real-time-acoustic-beamforming-2306.10772</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-an-interpretable-end-to-end-network-for-real-time-acoustic-beamforming-2306.10772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-an-interpretable-end-to-end-network-for-real-time-acoustic-beamforming-2306.10772"/></url>
<url><loc>https://scifaro.com/en/abs/multitrack-music-transcription-with-a-time-frequency-perceiver-2306.10785</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitrack-music-transcription-with-a-time-frequency-perceiver-2306.10785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitrack-music-transcription-with-a-time-frequency-perceiver-2306.10785"/></url>
<url><loc>https://scifaro.com/en/abs/female-mosquito-detection-by-means-of-ai-techniques-inside-release-containers-in-the-context-of-a-sterile-insect-technique-program-2306.10843</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/female-mosquito-detection-by-means-of-ai-techniques-inside-release-containers-in-the-context-of-a-sterile-insect-technique-program-2306.10843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/female-mosquito-detection-by-means-of-ai-techniques-inside-release-containers-in-the-context-of-a-sterile-insect-technique-program-2306.10843"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-timbre-effects-with-differentiable-digital-signal-processing-2306.10886</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-timbre-effects-with-differentiable-digital-signal-processing-2306.10886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-timbre-effects-with-differentiable-digital-signal-processing-2306.10886"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-channel-attention-for-computationally-efficient-sound-event-detection-2306.11277</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-channel-attention-for-computationally-efficient-sound-event-detection-2306.11277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-channel-attention-for-computationally-efficient-sound-event-detection-2306.11277"/></url>
<url><loc>https://scifaro.com/en/abs/phase-repair-for-time-domain-convolutional-neural-networks-in-music-super-resolution-2306.11282</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-repair-for-time-domain-convolutional-neural-networks-in-music-super-resolution-2306.11282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-repair-for-time-domain-convolutional-neural-networks-in-music-super-resolution-2306.11282"/></url>
<url><loc>https://scifaro.com/en/abs/multi-pass-training-and-cross-information-fusion-for-low-resource-end-to-end-accented-speech-recognition-2306.11309</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-pass-training-and-cross-information-fusion-for-low-resource-end-to-end-accented-speech-recognition-2306.11309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-pass-training-and-cross-information-fusion-for-low-resource-end-to-end-accented-speech-recognition-2306.11309"/></url>
<url><loc>https://scifaro.com/en/abs/pipeline-for-recording-datasets-and-running-neural-networks-on-the-bela-embedded-hardware-platform-2306.11389</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pipeline-for-recording-datasets-and-running-neural-networks-on-the-bela-embedded-hardware-platform-2306.11389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pipeline-for-recording-datasets-and-running-neural-networks-on-the-bela-embedded-hardware-platform-2306.11389"/></url>
<url><loc>https://scifaro.com/en/abs/sound-reconstruction-from-human-brain-activity-via-a-generative-model-with-brain-like-auditory-features-2306.11629</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-reconstruction-from-human-brain-activity-via-a-generative-model-with-brain-like-auditory-features-2306.11629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-reconstruction-from-human-brain-activity-via-a-generative-model-with-brain-like-auditory-features-2306.11629"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-based-multimodal-music-similarity-2306.12249</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-based-multimodal-music-similarity-2306.12249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-based-multimodal-music-similarity-2306.12249"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speech-disentanglement-for-voice-conversion-using-rank-module-and-speech-augmentation-2306.12259</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speech-disentanglement-for-voice-conversion-using-rank-module-and-speech-augmentation-2306.12259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speech-disentanglement-for-voice-conversion-using-rank-module-and-speech-augmentation-2306.12259"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-prototypical-approach-for-unsupervised-sound-classification-2306.12300</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-prototypical-approach-for-unsupervised-sound-classification-2306.12300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-prototypical-approach-for-unsupervised-sound-classification-2306.12300"/></url>
<url><loc>https://scifaro.com/en/abs/toward-leveraging-pre-trained-self-supervised-frontends-for-automatic-singing-voice-understanding-tasks-three-case-studies-2306.12714</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-leveraging-pre-trained-self-supervised-frontends-for-automatic-singing-voice-understanding-tasks-three-case-studies-2306.12714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-leveraging-pre-trained-self-supervised-frontends-for-automatic-singing-voice-understanding-tasks-three-case-studies-2306.12714"/></url>
<url><loc>https://scifaro.com/en/abs/mfccgan-a-novel-mfcc-based-speech-synthesizer-using-adversarial-learning-2306.12785</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfccgan-a-novel-mfcc-based-speech-synthesizer-using-adversarial-learning-2306.12785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfccgan-a-novel-mfcc-based-speech-synthesizer-using-adversarial-learning-2306.12785"/></url>
<url><loc>https://scifaro.com/en/abs/russian-assimilatory-palatalization-is-incomplete-neutralization-2306.12789</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/russian-assimilatory-palatalization-is-incomplete-neutralization-2306.12789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/russian-assimilatory-palatalization-is-incomplete-neutralization-2306.12789"/></url>
<url><loc>https://scifaro.com/en/abs/noisyilrma-diffuse-noise-aware-independent-low-rank-matrix-analysis-for-fast-blind-source-extraction-2306.12820</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisyilrma-diffuse-noise-aware-independent-low-rank-matrix-analysis-for-fast-blind-source-extraction-2306.12820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisyilrma-diffuse-noise-aware-independent-low-rank-matrix-analysis-for-fast-blind-source-extraction-2306.12820"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-siren-audio-compression-with-implicit-neural-representations-2306.12957</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-siren-audio-compression-with-implicit-neural-representations-2306.12957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-siren-audio-compression-with-implicit-neural-representations-2306.12957"/></url>
<url><loc>https://scifaro.com/en/abs/disco-10m-a-large-scale-music-dataset-2306.13512</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disco-10m-a-large-scale-music-dataset-2306.13512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disco-10m-a-large-scale-music-dataset-2306.13512"/></url>
<url><loc>https://scifaro.com/en/abs/modulation-graphs-in-popular-music-2306.13691</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulation-graphs-in-popular-music-2306.13691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulation-graphs-in-popular-music-2306.13691"/></url>
<url><loc>https://scifaro.com/en/abs/improving-end-to-end-neural-diarization-using-conversational-summary-representations-2306.13863</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-end-to-end-neural-diarization-using-conversational-summary-representations-2306.13863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-end-to-end-neural-diarization-using-conversational-summary-representations-2306.13863"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-personalized-speech-recognition-system-development-for-the-deaf-and-hard-of-hearing-2306.13953</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-personalized-speech-recognition-system-development-for-the-deaf-and-hard-of-hearing-2306.13953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-personalized-speech-recognition-system-development-for-the-deaf-and-hard-of-hearing-2306.13953"/></url>
<url><loc>https://scifaro.com/en/abs/dse-tts-dual-speaker-embedding-for-cross-lingual-text-to-speech-2306.14145</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dse-tts-dual-speaker-embedding-for-cross-lingual-text-to-speech-2306.14145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dse-tts-dual-speaker-embedding-for-cross-lingual-text-to-speech-2306.14145"/></url>
<url><loc>https://scifaro.com/en/abs/primadnn-a-characteristics-aware-dnn-customization-for-singing-technique-detection-2306.14191</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/primadnn-a-characteristics-aware-dnn-customization-for-singing-technique-detection-2306.14191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/primadnn-a-characteristics-aware-dnn-customization-for-singing-technique-detection-2306.14191"/></url>
<url><loc>https://scifaro.com/en/abs/aeroacoustic-source-localization-2306.14276</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aeroacoustic-source-localization-2306.14276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aeroacoustic-source-localization-2306.14276"/></url>
<url><loc>https://scifaro.com/en/abs/the-singing-voice-conversion-challenge-2023-2306.14422</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-singing-voice-conversion-challenge-2023-2306.14422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-singing-voice-conversion-challenge-2023-2306.14422"/></url>
<url><loc>https://scifaro.com/en/abs/mono-to-stereo-through-parametric-stereo-generation-2306.14647</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mono-to-stereo-through-parametric-stereo-generation-2306.14647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mono-to-stereo-through-parametric-stereo-generation-2306.14647"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-learning-architecture-with-spatio-temporal-focusing-for-detecting-respiratory-anomalies-2306.14929</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-learning-architecture-with-spatio-temporal-focusing-for-detecting-respiratory-anomalies-2306.14929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-learning-architecture-with-spatio-temporal-focusing-for-detecting-respiratory-anomalies-2306.14929"/></url>
<url><loc>https://scifaro.com/en/abs/transsionadd-a-multi-frame-reinforcement-based-sequence-tagging-model-for-audio-deepfake-detection-2306.15212</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transsionadd-a-multi-frame-reinforcement-based-sequence-tagging-model-for-audio-deepfake-detection-2306.15212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transsionadd-a-multi-frame-reinforcement-based-sequence-tagging-model-for-audio-deepfake-detection-2306.15212"/></url>
<url><loc>https://scifaro.com/en/abs/multi-perspective-information-fusion-res2net-with-randomspecmix-for-fake-speech-detection-2306.15389</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-perspective-information-fusion-res2net-with-randomspecmix-for-fake-speech-detection-2306.15389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-perspective-information-fusion-res2net-with-randomspecmix-for-fake-speech-detection-2306.15389"/></url>
<url><loc>https://scifaro.com/en/abs/rmvpe-a-robust-model-for-vocal-pitch-estimation-in-polyphonic-music-2306.15412</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rmvpe-a-robust-model-for-vocal-pitch-estimation-in-polyphonic-music-2306.15412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rmvpe-a-robust-model-for-vocal-pitch-estimation-in-polyphonic-music-2306.15412"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-unsupervised-audio-pre-training-for-video-to-speech-synthesis-2306.15464</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-unsupervised-audio-pre-training-for-video-to-speech-synthesis-2306.15464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-unsupervised-audio-pre-training-for-video-to-speech-synthesis-2306.15464"/></url>
<url><loc>https://scifaro.com/en/abs/fake-the-real-backdoor-attack-on-deep-speech-classification-via-voice-conversion-2306.15875</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fake-the-real-backdoor-attack-on-deep-speech-classification-via-voice-conversion-2306.15875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fake-the-real-backdoor-attack-on-deep-speech-classification-via-voice-conversion-2306.15875"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-neural-beamformer-with-spatial-information-for-target-speech-extraction-2306.15942</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-neural-beamformer-with-spatial-information-for-target-speech-extraction-2306.15942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-neural-beamformer-with-spatial-information-for-target-speech-extraction-2306.15942"/></url>
<url><loc>https://scifaro.com/en/abs/enrollment-stage-backdoor-attacks-on-speaker-recognition-systems-via-adversarial-ultrasound-2306.16022</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enrollment-stage-backdoor-attacks-on-speaker-recognition-systems-via-adversarial-ultrasound-2306.16022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enrollment-stage-backdoor-attacks-on-speaker-recognition-systems-via-adversarial-ultrasound-2306.16022"/></url>
<url><loc>https://scifaro.com/en/abs/improving-primate-sounds-classification-using-binary-presorting-for-deep-learning-2306.16054</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-primate-sounds-classification-using-binary-presorting-for-deep-learning-2306.16054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-primate-sounds-classification-using-binary-presorting-for-deep-learning-2306.16054"/></url>
<url><loc>https://scifaro.com/en/abs/graph-neural-networks-for-sound-source-localization-on-distributed-microphone-networks-2306.16081</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-neural-networks-for-sound-source-localization-on-distributed-microphone-networks-2306.16081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-neural-networks-for-sound-source-localization-on-distributed-microphone-networks-2306.16081"/></url>
<url><loc>https://scifaro.com/en/abs/unitspeech-speaker-adaptive-speech-synthesis-with-untranscribed-data-2306.16083</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unitspeech-speaker-adaptive-speech-synthesis-with-untranscribed-data-2306.16083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unitspeech-speaker-adaptive-speech-synthesis-with-untranscribed-data-2306.16083"/></url>
<url><loc>https://scifaro.com/en/abs/focus-on-the-sound-around-you-monaural-target-speaker-extraction-via-distance-and-speaker-information-2306.16241</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/focus-on-the-sound-around-you-monaural-target-speaker-extraction-via-distance-and-speaker-information-2306.16241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/focus-on-the-sound-around-you-monaural-target-speaker-extraction-via-distance-and-speaker-information-2306.16241"/></url>
<url><loc>https://scifaro.com/en/abs/mc-spex-towards-effective-speaker-extraction-with-multi-scale-interfusion-and-conditional-speaker-modulation-2306.16250</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mc-spex-towards-effective-speaker-extraction-with-multi-scale-interfusion-and-conditional-speaker-modulation-2306.16250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mc-spex-towards-effective-speaker-extraction-with-multi-scale-interfusion-and-conditional-speaker-modulation-2306.16250"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-encoders-for-fine-tuning-asr-models-on-overlapped-speech-2306.16398</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-encoders-for-fine-tuning-asr-models-on-overlapped-speech-2306.16398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-encoders-for-fine-tuning-asr-models-on-overlapped-speech-2306.16398"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-with-semi-supervised-dataset-annotation-for-birdcall-classification-2306.16760</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-with-semi-supervised-dataset-annotation-for-birdcall-classification-2306.16760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-with-semi-supervised-dataset-annotation-for-birdcall-classification-2306.16760"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-music-hierarchies-with-a-graph-based-neural-decoder-2306.16955</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-music-hierarchies-with-a-graph-based-neural-decoder-2306.16955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-music-hierarchies-with-a-graph-based-neural-decoder-2306.16955"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-age-and-gender-prediction-with-transformers-2306.16962</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-age-and-gender-prediction-with-transformers-2306.16962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-age-and-gender-prediction-with-transformers-2306.16962"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relevance-of-acoustic-measurements-for-creating-realistic-virtual-acoustic-environments-2306.16967</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relevance-of-acoustic-measurements-for-creating-realistic-virtual-acoustic-environments-2306.16967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relevance-of-acoustic-measurements-for-creating-realistic-virtual-acoustic-environments-2306.16967"/></url>
<url><loc>https://scifaro.com/en/abs/diff-foley-synchronized-video-to-audio-synthesis-with-latent-diffusion-models-2306.17203</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-foley-synchronized-video-to-audio-synthesis-with-latent-diffusion-models-2306.17203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-foley-synchronized-video-to-audio-synthesis-with-latent-diffusion-models-2306.17203"/></url>
<url><loc>https://scifaro.com/en/abs/audio-embeddings-as-teachers-for-music-classification-2306.17424</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-embeddings-as-teachers-for-music-classification-2306.17424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-embeddings-as-teachers-for-music-classification-2306.17424"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-voice-towards-continuous-3d-hand-pose-tracking-on-commercial-home-assistant-devices-2306.17477</loc><lastmod>2025-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-voice-towards-continuous-3d-hand-pose-tracking-on-commercial-home-assistant-devices-2306.17477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-voice-towards-continuous-3d-hand-pose-tracking-on-commercial-home-assistant-devices-2306.17477"/></url>
<url><loc>https://scifaro.com/en/abs/empirical-interpretation-of-the-relationship-between-speech-acoustic-context-and-emotion-recognition-2306.17500</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empirical-interpretation-of-the-relationship-between-speech-acoustic-context-and-emotion-recognition-2306.17500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empirical-interpretation-of-the-relationship-between-speech-acoustic-context-and-emotion-recognition-2306.17500"/></url>
<url><loc>https://scifaro.com/en/abs/casein-cascading-explicit-and-implicit-control-for-fine-grained-emotion-intensity-regulation-2307.00020</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/casein-cascading-explicit-and-implicit-control-for-fine-grained-emotion-intensity-regulation-2307.00020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/casein-cascading-explicit-and-implicit-control-for-fine-grained-emotion-intensity-regulation-2307.00020"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-multi-module-audio-deepfake-generation-system-for-add-challenge-2023-2307.00729</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-multi-module-audio-deepfake-generation-system-for-add-challenge-2023-2307.00729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-multi-module-audio-deepfake-generation-system-for-add-challenge-2023-2307.00729"/></url>
<url><loc>https://scifaro.com/en/abs/musif-a-python-package-for-symbolic-music-feature-extraction-2307.01120</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musif-a-python-package-for-symbolic-music-feature-extraction-2307.01120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musif-a-python-package-for-symbolic-music-feature-extraction-2307.01120"/></url>
<url><loc>https://scifaro.com/en/abs/emogen-eliminating-subjective-bias-in-emotional-music-generation-2307.01229</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emogen-eliminating-subjective-bias-in-emotional-music-generation-2307.01229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emogen-eliminating-subjective-bias-in-emotional-music-generation-2307.01229"/></url>
<url><loc>https://scifaro.com/en/abs/robustl2s-speaker-specific-lip-to-speech-synthesis-exploiting-self-supervised-representations-2307.01233</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustl2s-speaker-specific-lip-to-speech-synthesis-exploiting-self-supervised-representations-2307.01233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustl2s-speaker-specific-lip-to-speech-synthesis-exploiting-self-supervised-representations-2307.01233"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-temporal-graph-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2307.01386</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-temporal-graph-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2307.01386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-temporal-graph-based-multi-channel-speaker-verification-with-ad-hoc-microphone-arrays-2307.01386"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-conformer-with-asr-or-asv-for-anti-spoofing-countermeasure-2307.01546</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-conformer-with-asr-or-asv-for-anti-spoofing-countermeasure-2307.01546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-conformer-with-asr-or-asv-for-anti-spoofing-countermeasure-2307.01546"/></url>
<url><loc>https://scifaro.com/en/abs/going-retro-astonishingly-simple-yet-effective-rule-based-prosody-modelling-for-speech-synthesis-simulating-emotion-dimensions-2307.02132</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/going-retro-astonishingly-simple-yet-effective-rule-based-prosody-modelling-for-speech-synthesis-simulating-emotion-dimensions-2307.02132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/going-retro-astonishingly-simple-yet-effective-rule-based-prosody-modelling-for-speech-synthesis-simulating-emotion-dimensions-2307.02132"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-with-diffusion-based-multichannel-speech-enhancement-for-speaker-verification-under-noisy-conditions-2307.02244</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-with-diffusion-based-multichannel-speech-enhancement-for-speaker-verification-under-noisy-conditions-2307.02244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-with-diffusion-based-multichannel-speech-enhancement-for-speaker-verification-under-noisy-conditions-2307.02244"/></url>
<url><loc>https://scifaro.com/en/abs/dsarsr-deep-stacked-auto-encoders-enhanced-robust-speaker-recognition-2307.02751</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsarsr-deep-stacked-auto-encoders-enhanced-robust-speaker-recognition-2307.02751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsarsr-deep-stacked-auto-encoders-enhanced-robust-speaker-recognition-2307.02751"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-raw-waveforms-with-deep-learning-frameworks-for-speech-emotion-recognition-2307.02820</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-raw-waveforms-with-deep-learning-frameworks-for-speech-emotion-recognition-2307.02820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-raw-waveforms-with-deep-learning-frameworks-for-speech-emotion-recognition-2307.02820"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-at-noise-robust-automatic-speech-recognizers-are-also-strong-general-audio-event-taggers-2307.03183</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-at-noise-robust-automatic-speech-recognizers-are-also-strong-general-audio-event-taggers-2307.03183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-at-noise-robust-automatic-speech-recognizers-are-also-strong-general-audio-event-taggers-2307.03183"/></url>
<url><loc>https://scifaro.com/en/abs/the-chime-7-udase-task-unsupervised-domain-adaptation-for-conversational-speech-enhancement-2307.03533</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-chime-7-udase-task-unsupervised-domain-adaptation-for-conversational-speech-enhancement-2307.03533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-chime-7-udase-task-unsupervised-domain-adaptation-for-conversational-speech-enhancement-2307.03533"/></url>
<url><loc>https://scifaro.com/en/abs/roman-numeral-analysis-with-graph-neural-networks-onset-wise-predictions-from-note-wise-features-2307.03544</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/roman-numeral-analysis-with-graph-neural-networks-onset-wise-predictions-from-note-wise-features-2307.03544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/roman-numeral-analysis-with-graph-neural-networks-onset-wise-predictions-from-note-wise-features-2307.03544"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-guided-music-accompaniment-generation-based-on-variational-autoencoder-2307.04015</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-guided-music-accompaniment-generation-based-on-variational-autoencoder-2307.04015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-guided-music-accompaniment-generation-based-on-variational-autoencoder-2307.04015"/></url>
<url><loc>https://scifaro.com/en/abs/edge-storage-management-recipe-with-zero-shot-data-compression-for-road-anomaly-detection-2307.04298</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edge-storage-management-recipe-with-zero-shot-data-compression-for-road-anomaly-detection-2307.04298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edge-storage-management-recipe-with-zero-shot-data-compression-for-road-anomaly-detection-2307.04298"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-piano-transcription-with-hierarchical-frequency-time-transformer-2307.04305</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-piano-transcription-with-hierarchical-frequency-time-transformer-2307.04305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-piano-transcription-with-hierarchical-frequency-time-transformer-2307.04305"/></url>
<url><loc>https://scifaro.com/en/abs/hclas-x-hierarchical-and-cascaded-lyrics-alignment-system-using-multimodal-cross-correlation-2307.04377</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hclas-x-hierarchical-and-cascaded-lyrics-alignment-system-using-multimodal-cross-correlation-2307.04377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hclas-x-hierarchical-and-cascaded-lyrics-alignment-system-using-multimodal-cross-correlation-2307.04377"/></url>
<url><loc>https://scifaro.com/en/abs/echovest-real-time-sound-classification-and-depth-perception-expressed-through-transcutaneous-electrical-nerve-stimulation-2307.04604</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echovest-real-time-sound-classification-and-depth-perception-expressed-through-transcutaneous-electrical-nerve-stimulation-2307.04604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echovest-real-time-sound-classification-and-depth-perception-expressed-through-transcutaneous-electrical-nerve-stimulation-2307.04604"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-msxf-speech-to-speech-translation-system-for-iwslt-2023-speech-to-speech-translation-task-2307.04630</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-msxf-speech-to-speech-translation-system-for-iwslt-2023-speech-to-speech-translation-task-2307.04630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-msxf-speech-to-speech-translation-system-for-iwslt-2023-speech-to-speech-translation-task-2307.04630"/></url>
<url><loc>https://scifaro.com/en/abs/vampnet-music-generation-via-masked-acoustic-token-modeling-2307.04686</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vampnet-music-generation-via-masked-acoustic-token-modeling-2307.04686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vampnet-music-generation-via-masked-acoustic-token-modeling-2307.04686"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-tract-area-estimation-by-gradient-descent-2307.04702</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-tract-area-estimation-by-gradient-descent-2307.04702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-tract-area-estimation-by-gradient-descent-2307.04702"/></url>
<url><loc>https://scifaro.com/en/abs/launchpadgpt-language-model-as-music-visualization-designer-on-launchpad-2307.04827</loc><lastmod>2025-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/launchpadgpt-language-model-as-music-visualization-designer-on-launchpad-2307.04827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/launchpadgpt-language-model-as-music-visualization-designer-on-launchpad-2307.04827"/></url>
<url><loc>https://scifaro.com/en/abs/the-smarty4covid-dataset-and-knowledge-base-a-framework-enabling-interpretable-analysis-of-audio-signals-2307.05096</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-smarty4covid-dataset-and-knowledge-base-a-framework-enabling-interpretable-analysis-of-audio-signals-2307.05096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-smarty4covid-dataset-and-knowledge-base-a-framework-enabling-interpretable-analysis-of-audio-signals-2307.05096"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-feature-extraction-for-symbolic-music-2307.05107</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-feature-extraction-for-symbolic-music-2307.05107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-feature-extraction-for-symbolic-music-2307.05107"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effectiveness-of-speech-self-supervised-learning-for-music-2307.05161</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effectiveness-of-speech-self-supervised-learning-for-music-2307.05161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effectiveness-of-speech-self-supervised-learning-for-music-2307.05161"/></url>
<url><loc>https://scifaro.com/en/abs/shredgp-guitarist-style-conditioned-tablature-generation-2307.05324</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shredgp-guitarist-style-conditioned-tablature-generation-2307.05324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shredgp-guitarist-style-conditioned-tablature-generation-2307.05324"/></url>
<url><loc>https://scifaro.com/en/abs/proggp-from-guitarpro-tablature-neural-generation-to-progressive-metal-production-2307.05328</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proggp-from-guitarpro-tablature-neural-generation-to-progressive-metal-production-2307.05328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proggp-from-guitarpro-tablature-neural-generation-to-progressive-metal-production-2307.05328"/></url>
<url><loc>https://scifaro.com/en/abs/collaborative-song-dataset-cosod-an-annotated-dataset-of-multi-artist-collaborations-in-popular-music-2307.05588</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collaborative-song-dataset-cosod-an-annotated-dataset-of-multi-artist-collaborations-in-popular-music-2307.05588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collaborative-song-dataset-cosod-an-annotated-dataset-of-multi-artist-collaborations-in-popular-music-2307.05588"/></url>
<url><loc>https://scifaro.com/en/abs/language-routing-mixture-of-experts-for-multilingual-and-code-switching-speech-recognition-2307.05956</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-routing-mixture-of-experts-for-multilingual-and-code-switching-speech-recognition-2307.05956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-routing-mixture-of-experts-for-multilingual-and-code-switching-speech-recognition-2307.05956"/></url>
<url><loc>https://scifaro.com/en/abs/can-large-language-models-aid-in-annotating-speech-emotional-data-uncovering-new-frontiers-2307.06090</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-large-language-models-aid-in-annotating-speech-emotional-data-uncovering-new-frontiers-2307.06090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-large-language-models-aid-in-annotating-speech-emotional-data-uncovering-new-frontiers-2307.06090"/></url>
<url><loc>https://scifaro.com/en/abs/b-clean-sc-clean-sc-for-broadband-sources-2307.06181</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/b-clean-sc-clean-sc-for-broadband-sources-2307.06181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/b-clean-sc-clean-sc-for-broadband-sources-2307.06181"/></url>
<url><loc>https://scifaro.com/en/abs/uncovering-the-deceptions-an-analysis-on-audio-spoofing-detection-and-future-prospects-2307.06669</loc><lastmod>2023-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncovering-the-deceptions-an-analysis-on-audio-spoofing-detection-and-future-prospects-2307.06669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncovering-the-deceptions-an-analysis-on-audio-spoofing-detection-and-future-prospects-2307.06669"/></url>
<url><loc>https://scifaro.com/en/abs/anuraset-a-dataset-for-benchmarking-neotropical-anuran-calls-identification-in-passive-acoustic-monitoring-2307.06860</loc><lastmod>2023-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anuraset-a-dataset-for-benchmarking-neotropical-anuran-calls-identification-in-passive-acoustic-monitoring-2307.06860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anuraset-a-dataset-for-benchmarking-neotropical-anuran-calls-identification-in-passive-acoustic-monitoring-2307.06860"/></url>
<url><loc>https://scifaro.com/en/abs/audioinceptionnext-tcl-ai-lab-submission-to-epic-sound-audio-based-interaction-recognition-challenge-2023-2307.07265</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioinceptionnext-tcl-ai-lab-submission-to-epic-sound-audio-based-interaction-recognition-challenge-2023-2307.07265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioinceptionnext-tcl-ai-lab-submission-to-epic-sound-audio-based-interaction-recognition-challenge-2023-2307.07265"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-percussive-technique-recognition-and-embedding-learning-for-the-acoustic-guitar-2307.07426</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-percussive-technique-recognition-and-embedding-learning-for-the-acoustic-guitar-2307.07426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-percussive-technique-recognition-and-embedding-learning-for-the-acoustic-guitar-2307.07426"/></url>
<url><loc>https://scifaro.com/en/abs/single-and-multi-speaker-cloned-voice-detection-from-perceptual-to-learned-features-2307.07683</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-and-multi-speaker-cloned-voice-detection-from-perceptual-to-learned-features-2307.07683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-and-multi-speaker-cloned-voice-detection-from-perceptual-to-learned-features-2307.07683"/></url>
<url><loc>https://scifaro.com/en/abs/noisebandnet-controllable-time-varying-neural-synthesis-of-sound-effects-using-filterbanks-2307.08007</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisebandnet-controllable-time-varying-neural-synthesis-of-sound-effects-using-filterbanks-2307.08007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisebandnet-controllable-time-varying-neural-synthesis-of-sound-effects-using-filterbanks-2307.08007"/></url>
<url><loc>https://scifaro.com/en/abs/towards-stealthy-backdoor-attacks-against-speech-recognition-via-elements-of-sound-2307.08208</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-stealthy-backdoor-attacks-against-speech-recognition-via-elements-of-sound-2307.08208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-stealthy-backdoor-attacks-against-speech-recognition-via-elements-of-sound-2307.08208"/></url>
<url><loc>https://scifaro.com/en/abs/tst-time-sparse-transducer-for-automatic-speech-recognition-2307.08323</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tst-time-sparse-transducer-for-automatic-speech-recognition-2307.08323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tst-time-sparse-transducer-for-automatic-speech-recognition-2307.08323"/></url>
<url><loc>https://scifaro.com/en/abs/oxfordvgg-submission-to-the-ego4d-av-transcription-challenge-2307.09006</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/oxfordvgg-submission-to-the-ego4d-av-transcription-challenge-2307.09006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/oxfordvgg-submission-to-the-ego4d-av-transcription-challenge-2307.09006"/></url>
<url><loc>https://scifaro.com/en/abs/flexiast-flexibility-is-what-ast-needs-2307.09286</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexiast-flexibility-is-what-ast-needs-2307.09286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexiast-flexibility-is-what-ast-needs-2307.09286"/></url>
<url><loc>https://scifaro.com/en/abs/musical-excellence-of-mridangam-an-introductory-review-2307.09425</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-excellence-of-mridangam-an-introductory-review-2307.09425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-excellence-of-mridangam-an-introductory-review-2307.09425"/></url>
<url><loc>https://scifaro.com/en/abs/jazzvar-a-dataset-of-variations-found-within-solo-piano-performances-of-jazz-standards-for-music-overpainting-2307.09670</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jazzvar-a-dataset-of-variations-found-within-solo-piano-performances-of-jazz-standards-for-music-overpainting-2307.09670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jazzvar-a-dataset-of-variations-found-within-solo-piano-performances-of-jazz-standards-for-music-overpainting-2307.09670"/></url>
<url><loc>https://scifaro.com/en/abs/improving-domain-generalization-for-sound-classification-with-sparse-frequency-regularized-transformer-2307.09723</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-domain-generalization-for-sound-classification-with-sparse-frequency-regularized-transformer-2307.09723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-domain-generalization-for-sound-classification-with-sparse-frequency-regularized-transformer-2307.09723"/></url>
<url><loc>https://scifaro.com/en/abs/from-west-to-east-who-can-understand-the-music-of-the-others-better-2307.09795</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-west-to-east-who-can-understand-the-music-of-the-others-better-2307.09795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-west-to-east-who-can-understand-the-music-of-the-others-better-2307.09795"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-timbre-synthesis-using-variational-autoencoders-regularized-on-timbre-descriptors-2307.10283</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-timbre-synthesis-using-variational-autoencoders-regularized-on-timbre-descriptors-2307.10283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-timbre-synthesis-using-variational-autoencoders-regularized-on-timbre-descriptors-2307.10283"/></url>
<url><loc>https://scifaro.com/en/abs/polyffusion-a-diffusion-model-for-polyphonic-score-generation-with-internal-and-external-controls-2307.10304</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyffusion-a-diffusion-model-for-polyphonic-score-generation-with-internal-and-external-controls-2307.10304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyffusion-a-diffusion-model-for-polyphonic-score-generation-with-internal-and-external-controls-2307.10304"/></url>
<url><loc>https://scifaro.com/en/abs/sc-vall-e-style-controllable-zero-shot-text-to-speech-synthesizer-2307.10550</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sc-vall-e-style-controllable-zero-shot-text-to-speech-synthesizer-2307.10550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sc-vall-e-style-controllable-zero-shot-text-to-speech-synthesizer-2307.10550"/></url>
<url><loc>https://scifaro.com/en/abs/vesper-a-compact-and-effective-pretrained-model-for-speech-emotion-recognition-2307.10757</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vesper-a-compact-and-effective-pretrained-model-for-speech-emotion-recognition-2307.10757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vesper-a-compact-and-effective-pretrained-model-for-speech-emotion-recognition-2307.10757"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-with-resnet-and-bi-gru-using-visual-spectrograms-2307.10773</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-with-resnet-and-bi-gru-using-visual-spectrograms-2307.10773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-with-resnet-and-bi-gru-using-visual-spectrograms-2307.10773"/></url>
<url><loc>https://scifaro.com/en/abs/masr-multi-label-aware-speech-representation-2307.10982</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masr-multi-label-aware-speech-representation-2307.10982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masr-multi-label-aware-speech-representation-2307.10982"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-distillation-diffusion-for-raw-music-generation-2307.10994</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-distillation-diffusion-for-raw-music-generation-2307.10994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-distillation-diffusion-for-raw-music-generation-2307.10994"/></url>
<url><loc>https://scifaro.com/en/abs/a-change-of-heart-improving-speech-emotion-recognition-through-speech-to-text-modality-conversion-2307.11584</loc><lastmod>2023-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-change-of-heart-improving-speech-emotion-recognition-through-speech-to-text-modality-conversion-2307.11584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-change-of-heart-improving-speech-emotion-recognition-through-speech-to-text-modality-conversion-2307.11584"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-integration-of-speech-separation-and-recognition-with-self-supervised-learning-representation-2307.12231</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-integration-of-speech-separation-and-recognition-with-self-supervised-learning-representation-2307.12231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-integration-of-speech-separation-and-recognition-with-self-supervised-learning-representation-2307.12231"/></url>
<url><loc>https://scifaro.com/en/abs/signal-reconstruction-from-mel-spectrogram-based-on-bi-level-consistency-of-full-band-magnitude-and-phase-2307.12232</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-reconstruction-from-mel-spectrogram-based-on-bi-level-consistency-of-full-band-magnitude-and-phase-2307.12232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-reconstruction-from-mel-spectrogram-based-on-bi-level-consistency-of-full-band-magnitude-and-phase-2307.12232"/></url>
<url><loc>https://scifaro.com/en/abs/a-meta-learning-scheme-for-fast-accent-domain-expansion-in-mandarin-speech-recognition-2307.12262</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-meta-learning-scheme-for-fast-accent-domain-expansion-in-mandarin-speech-recognition-2307.12262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-meta-learning-scheme-for-fast-accent-domain-expansion-in-mandarin-speech-recognition-2307.12262"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-audio-based-emotion-recognition-2307.12343</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-audio-based-emotion-recognition-2307.12343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-audio-based-emotion-recognition-2307.12343"/></url>
<url><loc>https://scifaro.com/en/abs/scraps-speech-contrastive-representations-of-acoustic-and-phonetic-spaces-2307.12445</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scraps-speech-contrastive-representations-of-acoustic-and-phonetic-spaces-2307.12445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scraps-speech-contrastive-representations-of-acoustic-and-phonetic-spaces-2307.12445"/></url>
<url><loc>https://scifaro.com/en/abs/robust-automatic-speech-recognition-via-wavaugment-guided-phoneme-adversarial-training-2307.12498</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-automatic-speech-recognition-via-wavaugment-guided-phoneme-adversarial-training-2307.12498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-automatic-speech-recognition-via-wavaugment-guided-phoneme-adversarial-training-2307.12498"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-for-every-user-and-budget-label-free-and-personalized-mixed-precision-quantization-2307.12659</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-for-every-user-and-budget-label-free-and-personalized-mixed-precision-quantization-2307.12659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-for-every-user-and-budget-label-free-and-personalized-mixed-precision-quantization-2307.12659"/></url>
<url><loc>https://scifaro.com/en/abs/online-continual-learning-in-keyword-spotting-for-low-resource-devices-via-pooling-high-order-temporal-statistics-2307.12660</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-continual-learning-in-keyword-spotting-for-low-resource-devices-via-pooling-high-order-temporal-statistics-2307.12660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-continual-learning-in-keyword-spotting-for-low-resource-devices-via-pooling-high-order-temporal-statistics-2307.12660"/></url>
<url><loc>https://scifaro.com/en/abs/an-objective-evaluation-of-hearing-aids-and-dnn-based-speech-enhancement-in-complex-acoustic-scenes-2307.12888</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-objective-evaluation-of-hearing-aids-and-dnn-based-speech-enhancement-in-complex-acoustic-scenes-2307.12888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-objective-evaluation-of-hearing-aids-and-dnn-based-speech-enhancement-in-complex-acoustic-scenes-2307.12888"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speech-and-overlap-detection-a-benchmark-over-multiple-audio-setup-and-speech-domains-2307.13012</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speech-and-overlap-detection-a-benchmark-over-multiple-audio-setup-and-speech-domains-2307.13012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speech-and-overlap-detection-a-benchmark-over-multiple-audio-setup-and-speech-domains-2307.13012"/></url>
<url><loc>https://scifaro.com/en/abs/audio-aware-query-enhanced-transformer-for-audio-visual-segmentation-2307.13236</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-aware-query-enhanced-transformer-for-audio-visual-segmentation-2307.13236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-aware-query-enhanced-transformer-for-audio-visual-segmentation-2307.13236"/></url>
<url><loc>https://scifaro.com/en/abs/cqnv-a-combination-of-coarsely-quantized-bitstream-and-neural-vocoder-for-low-rate-speech-coding-2307.13295</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cqnv-a-combination-of-coarsely-quantized-bitstream-and-neural-vocoder-for-low-rate-speech-coding-2307.13295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cqnv-a-combination-of-coarsely-quantized-bitstream-and-neural-vocoder-for-low-rate-speech-coding-2307.13295"/></url>
<url><loc>https://scifaro.com/en/abs/a-snoring-sound-dataset-for-body-position-recognition-collection-annotation-and-analysis-2307.13346</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-snoring-sound-dataset-for-body-position-recognition-collection-annotation-and-analysis-2307.13346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-snoring-sound-dataset-for-body-position-recognition-collection-annotation-and-analysis-2307.13346"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-intelligibility-predictor-for-hearing-impaired-individuals-using-self-supervised-speech-representations-2307.13423</loc><lastmod>2023-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-intelligibility-predictor-for-hearing-impaired-individuals-using-self-supervised-speech-representations-2307.13423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-intelligibility-predictor-for-hearing-impaired-individuals-using-self-supervised-speech-representations-2307.13423"/></url>
<url><loc>https://scifaro.com/en/abs/histogram-layer-time-delay-neural-networks-for-passive-sonar-classification-2307.13788</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/histogram-layer-time-delay-neural-networks-for-passive-sonar-classification-2307.13788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/histogram-layer-time-delay-neural-networks-for-passive-sonar-classification-2307.13788"/></url>
<url><loc>https://scifaro.com/en/abs/fitting-auditory-filterbanks-with-multiresolution-neural-networks-2307.13821</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fitting-auditory-filterbanks-with-multiresolution-neural-networks-2307.13821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fitting-auditory-filterbanks-with-multiresolution-neural-networks-2307.13821"/></url>
<url><loc>https://scifaro.com/en/abs/bovinetalk-machine-learning-for-vocalization-analysis-of-dairy-cattle-under-negative-affective-states-2307.13994</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bovinetalk-machine-learning-for-vocalization-analysis-of-dairy-cattle-under-negative-affective-states-2307.13994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bovinetalk-machine-learning-for-vocalization-analysis-of-dairy-cattle-under-negative-affective-states-2307.13994"/></url>
<url><loc>https://scifaro.com/en/abs/cif-t-a-novel-cif-based-transducer-architecture-for-automatic-speech-recognition-2307.14132</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cif-t-a-novel-cif-based-transducer-architecture-for-automatic-speech-recognition-2307.14132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cif-t-a-novel-cif-based-transducer-architecture-for-automatic-speech-recognition-2307.14132"/></url>
<url><loc>https://scifaro.com/en/abs/wavjourney-compositional-audio-creation-with-large-language-models-2307.14335</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavjourney-compositional-audio-creation-with-large-language-models-2307.14335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavjourney-compositional-audio-creation-with-large-language-models-2307.14335"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-using-u-net-spiking-neural-networks-2307.14464</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-u-net-spiking-neural-networks-2307.14464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-u-net-spiking-neural-networks-2307.14464"/></url>
<url><loc>https://scifaro.com/en/abs/complete-and-separate-conditional-separation-with-missing-target-source-attribute-completion-2307.14609</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complete-and-separate-conditional-separation-with-missing-target-source-attribute-completion-2307.14609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complete-and-separate-conditional-separation-with-missing-target-source-attribute-completion-2307.14609"/></url>
<url><loc>https://scifaro.com/en/abs/graph-based-polyphonic-multitrack-music-generation-2307.14928</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-based-polyphonic-multitrack-music-generation-2307.14928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-based-polyphonic-multitrack-music-generation-2307.14928"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-text-retrieval-via-hierarchical-cross-modal-interaction-and-auxiliary-captions-2307.15344</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-text-retrieval-via-hierarchical-cross-modal-interaction-and-auxiliary-captions-2307.15344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-text-retrieval-via-hierarchical-cross-modal-interaction-and-auxiliary-captions-2307.15344"/></url>
<url><loc>https://scifaro.com/en/abs/the-flyspeech-audio-visual-speaker-diarization-system-for-misp-challenge-2022-2307.15400</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-flyspeech-audio-visual-speaker-diarization-system-for-misp-challenge-2022-2307.15400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-flyspeech-audio-visual-speaker-diarization-system-for-misp-challenge-2022-2307.15400"/></url>
<url><loc>https://scifaro.com/en/abs/minimally-supervised-speech-synthesis-with-conditional-diffusion-model-and-language-model-a-comparative-study-of-semantic-coding-2307.15484</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimally-supervised-speech-synthesis-with-conditional-diffusion-model-and-language-model-a-comparative-study-of-semantic-coding-2307.15484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimally-supervised-speech-synthesis-with-conditional-diffusion-model-and-language-model-a-comparative-study-of-semantic-coding-2307.15484"/></url>
<url><loc>https://scifaro.com/en/abs/automated-approach-for-source-location-in-shallow-waters-2307.15491</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-approach-for-source-location-in-shallow-waters-2307.15491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-approach-for-source-location-in-shallow-waters-2307.15491"/></url>
<url><loc>https://scifaro.com/en/abs/all-for-one-and-one-for-all-deep-learning-based-feature-fusion-for-synthetic-speech-detection-2307.15555</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-for-one-and-one-for-all-deep-learning-based-feature-fusion-for-synthetic-speech-detection-2307.15555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-for-one-and-one-for-all-deep-learning-based-feature-fusion-for-synthetic-speech-detection-2307.15555"/></url>
<url><loc>https://scifaro.com/en/abs/unibrivl-robust-universal-representation-and-generation-of-audio-driven-diffusion-models-2307.15898</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unibrivl-robust-universal-representation-and-generation-of-audio-driven-diffusion-models-2307.15898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unibrivl-robust-universal-representation-and-generation-of-audio-driven-diffusion-models-2307.15898"/></url>
<url><loc>https://scifaro.com/en/abs/moisesdb-a-dataset-for-source-separation-beyond-4-stems-2307.15913</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moisesdb-a-dataset-for-source-separation-beyond-4-stems-2307.15913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moisesdb-a-dataset-for-source-separation-beyond-4-stems-2307.15913"/></url>
<url><loc>https://scifaro.com/en/abs/msstyletts-multi-scale-style-modeling-with-hierarchical-context-information-for-expressive-speech-synthesis-2307.16012</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msstyletts-multi-scale-style-modeling-with-hierarchical-context-information-for-expressive-speech-synthesis-2307.16012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msstyletts-multi-scale-style-modeling-with-hierarchical-context-information-for-expressive-speech-synthesis-2307.16012"/></url>
<url><loc>https://scifaro.com/en/abs/hiervst-hierarchical-adaptive-zero-shot-voice-style-transfer-2307.16171</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hiervst-hierarchical-adaptive-zero-shot-voice-style-transfer-2307.16171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hiervst-hierarchical-adaptive-zero-shot-voice-style-transfer-2307.16171"/></url>
<url><loc>https://scifaro.com/en/abs/lp-musiccaps-llm-based-pseudo-music-captioning-2307.16372</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lp-musiccaps-llm-based-pseudo-music-captioning-2307.16372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lp-musiccaps-llm-based-pseudo-music-captioning-2307.16372"/></url>
<url><loc>https://scifaro.com/en/abs/vits2-improving-quality-and-efficiency-of-single-stage-text-to-speech-with-adversarial-learning-and-architecture-design-2307.16430</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vits2-improving-quality-and-efficiency-of-single-stage-text-to-speech-with-adversarial-learning-and-architecture-design-2307.16430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vits2-improving-quality-and-efficiency-of-single-stage-text-to-speech-with-adversarial-learning-and-architecture-design-2307.16430"/></url>
<url><loc>https://scifaro.com/en/abs/spatialnet-extensively-learning-spatial-information-for-multichannel-joint-speech-separation-denoising-and-dereverberation-2307.16516</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatialnet-extensively-learning-spatial-information-for-multichannel-joint-speech-separation-denoising-and-dereverberation-2307.16516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatialnet-extensively-learning-spatial-information-for-multichannel-joint-speech-separation-denoising-and-dereverberation-2307.16516"/></url>
<url><loc>https://scifaro.com/en/abs/diffprosody-diffusion-based-latent-prosody-generation-for-expressive-speech-synthesis-with-prosody-conditional-adversarial-training-2307.16549</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffprosody-diffusion-based-latent-prosody-generation-for-expressive-speech-synthesis-with-prosody-conditional-adversarial-training-2307.16549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffprosody-diffusion-based-latent-prosody-generation-for-expressive-speech-synthesis-with-prosody-conditional-adversarial-training-2307.16549"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-video-to-speech-synthesis-with-synthesized-input-audio-2307.16584</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-video-to-speech-synthesis-with-synthesized-input-audio-2307.16584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-video-to-speech-synthesis-with-synthesized-input-audio-2307.16584"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-segmentation-by-exploring-cross-modal-mutual-semantics-2307.16620</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-segmentation-by-exploring-cross-modal-mutual-semantics-2307.16620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-segmentation-by-exploring-cross-modal-mutual-semantics-2307.16620"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-multi-speaker-speech-separation-using-efficient-transformer-model-2308.00010</loc><lastmod>2026-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-multi-speaker-speech-separation-using-efficient-transformer-model-2308.00010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-multi-speaker-speech-separation-using-efficient-transformer-model-2308.00010"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-how-a-generative-ai-interprets-music-2308.00015</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-how-a-generative-ai-interprets-music-2308.00015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-how-a-generative-ai-interprets-music-2308.00015"/></url>
<url><loc>https://scifaro.com/en/abs/music-de-limiter-networks-via-sample-wise-gain-inversion-2308.01187</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-de-limiter-networks-via-sample-wise-gain-inversion-2308.01187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-de-limiter-networks-via-sample-wise-gain-inversion-2308.01187"/></url>
<url><loc>https://scifaro.com/en/abs/careful-whisper-leveraging-advances-in-automatic-speech-recognition-for-robust-and-interpretable-aphasia-subtype-classification-2308.01327</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/careful-whisper-leveraging-advances-in-automatic-speech-recognition-for-robust-and-interpretable-aphasia-subtype-classification-2308.01327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/careful-whisper-leveraging-advances-in-automatic-speech-recognition-for-robust-and-interpretable-aphasia-subtype-classification-2308.01327"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-multi-user-indoor-sound-communications-with-acoustic-reconfigurable-metasurfaces-2308.01531</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-multi-user-indoor-sound-communications-with-acoustic-reconfigurable-metasurfaces-2308.01531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-multi-user-indoor-sound-communications-with-acoustic-reconfigurable-metasurfaces-2308.01531"/></url>
<url><loc>https://scifaro.com/en/abs/musicldm-enhancing-novelty-in-text-to-music-generation-using-beat-synchronous-mixup-strategies-2308.01546</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicldm-enhancing-novelty-in-text-to-music-generation-using-beat-synchronous-mixup-strategies-2308.01546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicldm-enhancing-novelty-in-text-to-music-generation-using-beat-synchronous-mixup-strategies-2308.01546"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-training-of-denoising-diffusion-model-using-dual-discriminators-for-high-fidelity-multi-speaker-tts-2308.01573</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-training-of-denoising-diffusion-model-using-dual-discriminators-for-high-fidelity-multi-speaker-tts-2308.01573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-training-of-denoising-diffusion-model-using-dual-discriminators-for-high-fidelity-multi-speaker-tts-2308.01573"/></url>
<url><loc>https://scifaro.com/en/abs/federated-representation-learning-for-automatic-speech-recognition-2308.02013</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-representation-learning-for-automatic-speech-recognition-2308.02013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-representation-learning-for-automatic-speech-recognition-2308.02013"/></url>
<url><loc>https://scifaro.com/en/abs/emo-dna-emotion-decoupling-and-alignment-learning-for-cross-corpus-speech-emotion-recognition-2308.02190</loc><lastmod>2023-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-dna-emotion-decoupling-and-alignment-learning-for-cross-corpus-speech-emotion-recognition-2308.02190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-dna-emotion-decoupling-and-alignment-learning-for-cross-corpus-speech-emotion-recognition-2308.02190"/></url>
<url><loc>https://scifaro.com/en/abs/finding-tori-self-supervised-learning-for-analyzing-korean-folk-song-2308.02249</loc><lastmod>2023-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/finding-tori-self-supervised-learning-for-analyzing-korean-folk-song-2308.02249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/finding-tori-self-supervised-learning-for-analyzing-korean-folk-song-2308.02249"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-monaural-speech-enhancement-using-spectrum-attention-fusion-2308.02263</loc><lastmod>2023-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-monaural-speech-enhancement-using-spectrum-attention-fusion-2308.02263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-monaural-speech-enhancement-using-spectrum-attention-fusion-2308.02263"/></url>
<url><loc>https://scifaro.com/en/abs/from-discrete-tokens-to-high-fidelity-audio-using-multi-band-diffusion-2308.02560</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-discrete-tokens-to-high-fidelity-audio-using-multi-band-diffusion-2308.02560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-discrete-tokens-to-high-fidelity-audio-using-multi-band-diffusion-2308.02560"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improving-harmonic-sensitivity-and-prediction-stability-for-singing-melody-extraction-2308.02723</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improving-harmonic-sensitivity-and-prediction-stability-for-singing-melody-extraction-2308.02723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improving-harmonic-sensitivity-and-prediction-stability-for-singing-melody-extraction-2308.02723"/></url>
<url><loc>https://scifaro.com/en/abs/a-systematic-exploration-of-joint-training-for-singing-voice-synthesis-2308.02867</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-systematic-exploration-of-joint-training-for-singing-voice-synthesis-2308.02867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-systematic-exploration-of-joint-training-for-singing-voice-synthesis-2308.02867"/></url>
<url><loc>https://scifaro.com/en/abs/elucidate-gender-fairness-in-singing-voice-transcription-2308.02898</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/elucidate-gender-fairness-in-singing-voice-transcription-2308.02898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/elucidate-gender-fairness-in-singing-voice-transcription-2308.02898"/></url>
<url><loc>https://scifaro.com/en/abs/characterization-of-cough-sounds-using-statistical-analysis-2308.03019</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterization-of-cough-sounds-using-statistical-analysis-2308.03019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterization-of-cough-sounds-using-statistical-analysis-2308.03019"/></url>
<url><loc>https://scifaro.com/en/abs/seaco-paraformer-a-non-autoregressive-asr-system-with-flexible-and-effective-hotword-customization-ability-2308.03266</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seaco-paraformer-a-non-autoregressive-asr-system-with-flexible-and-effective-hotword-customization-ability-2308.03266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seaco-paraformer-a-non-autoregressive-asr-system-with-flexible-and-effective-hotword-customization-ability-2308.03266"/></url>
<url><loc>https://scifaro.com/en/abs/do-you-remember-overcoming-catastrophic-forgetting-for-fake-audio-detection-2308.03300</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-you-remember-overcoming-catastrophic-forgetting-for-fake-audio-detection-2308.03300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-you-remember-overcoming-catastrophic-forgetting-for-fake-audio-detection-2308.03300"/></url>
<url><loc>https://scifaro.com/en/abs/improving-deep-attractor-network-by-bgru-and-gmm-for-speech-separation-2308.03332</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-deep-attractor-network-by-bgru-and-gmm-for-speech-separation-2308.03332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-deep-attractor-network-by-bgru-and-gmm-for-speech-separation-2308.03332"/></url>
<url><loc>https://scifaro.com/en/abs/msac-multiple-speech-attribute-control-method-for-reliable-speech-emotion-recognition-2308.04025</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msac-multiple-speech-attribute-control-method-for-reliable-speech-emotion-recognition-2308.04025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msac-multiple-speech-attribute-control-method-for-reliable-speech-emotion-recognition-2308.04025"/></url>
<url><loc>https://scifaro.com/en/abs/dual-input-neural-networks-for-positional-sound-source-localization-2308.04169</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-input-neural-networks-for-positional-sound-source-localization-2308.04169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-input-neural-networks-for-positional-sound-source-localization-2308.04169"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-attention-decoding-with-task-related-multi-view-contrastive-learning-2308.04244</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-attention-decoding-with-task-related-multi-view-contrastive-learning-2308.04244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-attention-decoding-with-task-related-multi-view-contrastive-learning-2308.04244"/></url>
<url><loc>https://scifaro.com/en/abs/capturing-spectral-and-long-term-contextual-information-for-speech-emotion-recognition-using-deep-learning-techniques-2308.04517</loc><lastmod>2023-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capturing-spectral-and-long-term-contextual-information-for-speech-emotion-recognition-using-deep-learning-techniques-2308.04517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capturing-spectral-and-long-term-contextual-information-for-speech-emotion-recognition-using-deep-learning-techniques-2308.04517"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-using-isomorphic-graph-attention-network-based-pooling-on-self-supervised-representation-2308.04666</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-using-isomorphic-graph-attention-network-based-pooling-on-self-supervised-representation-2308.04666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-using-isomorphic-graph-attention-network-based-pooling-on-self-supervised-representation-2308.04666"/></url>
<url><loc>https://scifaro.com/en/abs/jen-1-text-guided-universal-music-generation-with-omnidirectional-diffusion-models-2308.04729</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jen-1-text-guided-universal-music-generation-with-omnidirectional-diffusion-models-2308.04729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jen-1-text-guided-universal-music-generation-with-omnidirectional-diffusion-models-2308.04729"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-for-audio-privacy-preservation-using-source-separation-and-robust-adversarial-learning-2308.04960</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-for-audio-privacy-preservation-using-source-separation-and-robust-adversarial-learning-2308.04960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-for-audio-privacy-preservation-using-source-separation-and-robust-adversarial-learning-2308.04960"/></url>
<url><loc>https://scifaro.com/en/abs/sound-propagation-in-realistic-interactive-3d-scenes-with-parameterized-sources-using-deep-neural-operators-2308.05141</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-propagation-in-realistic-interactive-3d-scenes-with-parameterized-sources-using-deep-neural-operators-2308.05141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-propagation-in-realistic-interactive-3d-scenes-with-parameterized-sources-using-deep-neural-operators-2308.05141"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-based-target-speaker-automatic-speech-recognition-for-single-channel-audio-2308.05218</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-based-target-speaker-automatic-speech-recognition-for-single-channel-audio-2308.05218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-based-target-speaker-automatic-speech-recognition-for-single-channel-audio-2308.05218"/></url>
<url><loc>https://scifaro.com/en/abs/audioldm-2-learning-holistic-audio-generation-with-self-supervised-pretraining-2308.05734</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioldm-2-learning-holistic-audio-generation-with-self-supervised-pretraining-2308.05734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioldm-2-learning-holistic-audio-generation-with-self-supervised-pretraining-2308.05734"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-learning-on-overlapped-speech-detection-new-benchmark-and-new-general-system-2308.05987</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-learning-on-overlapped-speech-detection-new-benchmark-and-new-general-system-2308.05987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-learning-on-overlapped-speech-detection-new-benchmark-and-new-general-system-2308.05987"/></url>
<url><loc>https://scifaro.com/en/abs/audio-is-all-in-one-speech-driven-gesture-synthetics-using-wavlm-pre-trained-model-2308.05995</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-is-all-in-one-speech-driven-gesture-synthetics-using-wavlm-pre-trained-model-2308.05995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-is-all-in-one-speech-driven-gesture-synthetics-using-wavlm-pre-trained-model-2308.05995"/></url>
<url><loc>https://scifaro.com/en/abs/an-autoethnographic-exploration-of-xai-in-algorithmic-composition-2308.06089</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-autoethnographic-exploration-of-xai-in-algorithmic-composition-2308.06089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-autoethnographic-exploration-of-xai-in-algorithmic-composition-2308.06089"/></url>
<url><loc>https://scifaro.com/en/abs/lip2vec-efficient-and-robust-visual-speech-recognition-via-latent-to-latent-visual-to-audio-representation-mapping-2308.06112</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lip2vec-efficient-and-robust-visual-speech-recognition-via-latent-to-latent-visual-to-audio-representation-mapping-2308.06112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lip2vec-efficient-and-robust-visual-speech-recognition-via-latent-to-latent-visual-to-audio-representation-mapping-2308.06112"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-hallucinator-one-shot-voice-conversion-via-set-expansion-2308.06382</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-hallucinator-one-shot-voice-conversion-via-set-expansion-2308.06382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-hallucinator-one-shot-voice-conversion-via-set-expansion-2308.06382"/></url>
<url><loc>https://scifaro.com/en/abs/flexible-keyword-spotting-based-on-homogeneous-audio-text-embedding-2308.06472</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexible-keyword-spotting-based-on-homogeneous-audio-text-embedding-2308.06472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexible-keyword-spotting-based-on-homogeneous-audio-text-embedding-2308.06472"/></url>
<url><loc>https://scifaro.com/en/abs/bigwavgan-a-wave-to-wave-generative-adversarial-network-for-music-super-resolution-2308.06483</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigwavgan-a-wave-to-wave-generative-adversarial-network-for-music-super-resolution-2308.06483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigwavgan-a-wave-to-wave-generative-adversarial-network-for-music-super-resolution-2308.06483"/></url>
<url><loc>https://scifaro.com/en/abs/istftnet2-faster-and-more-lightweight-istft-based-neural-vocoder-using-1d-2d-cnn-2308.07117</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/istftnet2-faster-and-more-lightweight-istft-based-neural-vocoder-using-1d-2d-cnn-2308.07117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/istftnet2-faster-and-more-lightweight-istft-based-neural-vocoder-using-1d-2d-cnn-2308.07117"/></url>
<url><loc>https://scifaro.com/en/abs/active-bird2vec-towards-end-to-end-bird-sound-monitoring-with-transformers-2308.07121</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-bird2vec-towards-end-to-end-bird-sound-monitoring-with-transformers-2308.07121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-bird2vec-towards-end-to-end-bird-sound-monitoring-with-transformers-2308.07121"/></url>
<url><loc>https://scifaro.com/en/abs/human-voice-pitch-estimation-a-convolutional-network-with-auto-labeled-and-synthetic-data-2308.07170</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-voice-pitch-estimation-a-convolutional-network-with-auto-labeled-and-synthetic-data-2308.07170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-voice-pitch-estimation-a-convolutional-network-with-auto-labeled-and-synthetic-data-2308.07170"/></url>
<url><loc>https://scifaro.com/en/abs/audioformer-audio-transformer-learns-audio-feature-representations-from-discrete-acoustic-codes-2308.07221</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioformer-audio-transformer-learns-audio-feature-representations-from-discrete-acoustic-codes-2308.07221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioformer-audio-transformer-learns-audio-feature-representations-from-discrete-acoustic-codes-2308.07221"/></url>
<url><loc>https://scifaro.com/en/abs/diffsed-sound-event-detection-with-denoising-diffusion-2308.07293</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsed-sound-event-detection-with-denoising-diffusion-2308.07293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsed-sound-event-detection-with-denoising-diffusion-2308.07293"/></url>
<url><loc>https://scifaro.com/en/abs/diffv2s-diffusion-based-video-to-speech-synthesis-with-vision-guided-speaker-embedding-2308.07787</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffv2s-diffusion-based-video-to-speech-synthesis-with-vision-guided-speaker-embedding-2308.07787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffv2s-diffusion-based-video-to-speech-synthesis-with-vision-guided-speaker-embedding-2308.07787"/></url>
<url><loc>https://scifaro.com/en/abs/radio2text-streaming-speech-recognition-using-mmwave-radio-signals-2308.08125</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radio2text-streaming-speech-recognition-using-mmwave-radio-signals-2308.08125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radio2text-streaming-speech-recognition-using-mmwave-radio-signals-2308.08125"/></url>
<url><loc>https://scifaro.com/en/abs/iianet-an-intra-and-inter-modality-attention-network-for-audio-visual-speech-separation-2308.08143</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iianet-an-intra-and-inter-modality-attention-network-for-audio-visual-speech-separation-2308.08143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iianet-an-intra-and-inter-modality-attention-network-for-audio-visual-speech-separation-2308.08143"/></url>
<url><loc>https://scifaro.com/en/abs/chinatelecom-system-description-to-voxceleb-speaker-recognition-challenge-2023-2308.08181</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chinatelecom-system-description-to-voxceleb-speaker-recognition-challenge-2023-2308.08181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chinatelecom-system-description-to-voxceleb-speaker-recognition-challenge-2023-2308.08181"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-synthesis-of-dysarthric-speech-for-asr-data-augmentation-2308.08438</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-synthesis-of-dysarthric-speech-for-asr-data-augmentation-2308.08438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-synthesis-of-dysarthric-speech-for-asr-data-augmentation-2308.08438"/></url>
<url><loc>https://scifaro.com/en/abs/affectecho-speaker-independent-and-language-agnostic-emotion-and-affect-transfer-for-speech-synthesis-2308.08577</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/affectecho-speaker-independent-and-language-agnostic-emotion-and-affect-transfer-for-speech-synthesis-2308.08577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/affectecho-speaker-independent-and-language-agnostic-emotion-and-affect-transfer-for-speech-synthesis-2308.08577"/></url>
<url><loc>https://scifaro.com/en/abs/long-frame-shift-neural-speech-phase-prediction-with-spectral-continuity-enhancement-and-interpolation-error-compensation-2308.08850</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-frame-shift-neural-speech-phase-prediction-with-spectral-continuity-enhancement-and-interpolation-error-compensation-2308.08850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-frame-shift-neural-speech-phase-prediction-with-spectral-continuity-enhancement-and-interpolation-error-compensation-2308.08850"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-high-quality-audio-and-video-via-language-for-sound-effects-retrieval-from-visual-queries-2308.09089</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-high-quality-audio-and-video-via-language-for-sound-effects-retrieval-from-visual-queries-2308.09089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-high-quality-audio-and-video-via-language-for-sound-effects-retrieval-from-visual-queries-2308.09089"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-anti-spoofing-with-fusion-reconstruction-learning-on-multi-order-spectrograms-2308.09302</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-anti-spoofing-with-fusion-reconstruction-learning-on-multi-order-spectrograms-2308.09302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-anti-spoofing-with-fusion-reconstruction-learning-on-multi-order-spectrograms-2308.09302"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-sampling-techniques-for-generating-melodies-with-a-transformer-language-model-2308.09454</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-sampling-techniques-for-generating-melodies-with-a-transformer-language-model-2308.09454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-sampling-techniques-for-generating-melodies-with-a-transformer-language-model-2308.09454"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-librispeech-an-augmented-dataset-for-spatial-audio-learning-2308.09514</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-librispeech-an-augmented-dataset-for-spatial-audio-learning-2308.09514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-librispeech-an-augmented-dataset-for-spatial-audio-learning-2308.09514"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-reconstructed-local-attention-res2net-with-f0-subband-for-fake-speech-detection-2308.09944</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-reconstructed-local-attention-res2net-with-f0-subband-for-fake-speech-detection-2308.09944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-reconstructed-local-attention-res2net-with-f0-subband-for-fake-speech-detection-2308.09944"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architectures-learning-fourier-transforms-signal-processing-and-much-more-2308.10388</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architectures-learning-fourier-transforms-signal-processing-and-much-more-2308.10388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architectures-learning-fourier-transforms-signal-processing-and-much-more-2308.10388"/></url>
<url><loc>https://scifaro.com/en/abs/tokensplit-using-discrete-speech-representations-for-direct-refined-and-transcript-conditioned-speech-separation-and-recognition-2308.10415</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tokensplit-using-discrete-speech-representations-for-direct-refined-and-transcript-conditioned-speech-separation-and-recognition-2308.10415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tokensplit-using-discrete-speech-representations-for-direct-refined-and-transcript-conditioned-speech-separation-and-recognition-2308.10415"/></url>
<url><loc>https://scifaro.com/en/abs/an-anchor-point-based-image-model-for-room-impulse-response-simulation-with-directional-source-radiation-and-sensor-directivity-patterns-2308.10543</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-anchor-point-based-image-model-for-room-impulse-response-simulation-with-directional-source-radiation-and-sensor-directivity-patterns-2308.10543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-anchor-point-based-image-model-for-room-impulse-response-simulation-with-directional-source-radiation-and-sensor-directivity-patterns-2308.10543"/></url>
<url><loc>https://scifaro.com/en/abs/libriwasn-a-data-set-for-meeting-separation-diarization-and-recognition-with-asynchronous-recording-devices-2308.10682</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libriwasn-a-data-set-for-meeting-separation-diarization-and-recognition-with-asynchronous-recording-devices-2308.10682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libriwasn-a-data-set-for-meeting-separation-diarization-and-recognition-with-asynchronous-recording-devices-2308.10682"/></url>
<url><loc>https://scifaro.com/en/abs/pmvc-data-augmentation-based-prosody-modeling-for-expressive-voice-conversion-2308.11084</loc><lastmod>2023-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pmvc-data-augmentation-based-prosody-modeling-for-expressive-voice-conversion-2308.11084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pmvc-data-augmentation-based-prosody-modeling-for-expressive-voice-conversion-2308.11084"/></url>
<url><loc>https://scifaro.com/en/abs/an-effective-transformer-based-contextual-model-and-temporal-gate-pooling-for-speaker-identification-2308.11241</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-effective-transformer-based-contextual-model-and-temporal-gate-pooling-for-speaker-identification-2308.11241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-effective-transformer-based-contextual-model-and-temporal-gate-pooling-for-speaker-identification-2308.11241"/></url>
<url><loc>https://scifaro.com/en/abs/music-understanding-llama-advancing-text-to-music-generation-with-question-answering-and-captioning-2308.11276</loc><lastmod>2023-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-understanding-llama-advancing-text-to-music-generation-with-question-answering-and-captioning-2308.11276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-understanding-llama-advancing-text-to-music-generation-with-question-answering-and-captioning-2308.11276"/></url>
<url><loc>https://scifaro.com/en/abs/convoifilter-a-case-study-of-doing-cocktail-party-speech-recognition-2308.11380</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convoifilter-a-case-study-of-doing-cocktail-party-speech-recognition-2308.11380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convoifilter-a-case-study-of-doing-cocktail-party-speech-recognition-2308.11380"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-denoising-streamed-from-mobile-phones-improves-speech-in-noise-understanding-for-hearing-aid-users-2308.11456</loc><lastmod>2023-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-denoising-streamed-from-mobile-phones-improves-speech-in-noise-understanding-for-hearing-aid-users-2308.11456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-denoising-streamed-from-mobile-phones-improves-speech-in-noise-understanding-for-hearing-aid-users-2308.11456"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-language-model-capabilities-for-sound-event-detection-2308.11530</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-language-model-capabilities-for-sound-event-detection-2308.11530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-language-model-capabilities-for-sound-event-detection-2308.11530"/></url>
<url><loc>https://scifaro.com/en/abs/complex-valued-neural-networks-for-voice-anti-spoofing-2308.11800</loc><lastmod>2023-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-valued-neural-networks-for-voice-anti-spoofing-2308.11800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-valued-neural-networks-for-voice-anti-spoofing-2308.11800"/></url>
<url><loc>https://scifaro.com/en/abs/audio-generation-with-multiple-conditional-diffusion-model-2308.11940</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-generation-with-multiple-conditional-diffusion-model-2308.11940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-generation-with-multiple-conditional-diffusion-model-2308.11940"/></url>
<url><loc>https://scifaro.com/en/abs/ced-consistent-ensemble-distillation-for-audio-tagging-2308.11957</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ced-consistent-ensemble-distillation-for-audio-tagging-2308.11957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ced-consistent-ensemble-distillation-for-audio-tagging-2308.11957"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-bends-in-popular-music-guitar-tablatures-2308.12307</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-bends-in-popular-music-guitar-tablatures-2308.12307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-bends-in-popular-music-guitar-tablatures-2308.12307"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-exploration-learning-to-generate-realistic-audio-for-silent-video-2308.12408</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-exploration-learning-to-generate-realistic-audio-for-silent-video-2308.12408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-exploration-learning-to-generate-realistic-audio-for-silent-video-2308.12408"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-acoustic-feature-fusion-network-for-depression-detection-2308.12478</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-acoustic-feature-fusion-network-for-depression-detection-2308.12478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-acoustic-feature-fusion-network-for-depression-detection-2308.12478"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-time-frequency-conformers-for-music-audio-enhancement-2308.12599</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-time-frequency-conformers-for-music-audio-enhancement-2308.12599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-time-frequency-conformers-for-music-audio-enhancement-2308.12599"/></url>
<url><loc>https://scifaro.com/en/abs/naaloss-rethinking-the-objective-of-speech-enhancement-2308.12615</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naaloss-rethinking-the-objective-of-speech-enhancement-2308.12615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naaloss-rethinking-the-objective-of-speech-enhancement-2308.12615"/></url>
<url><loc>https://scifaro.com/en/abs/whombat-an-open-source-annotation-tool-for-machine-learning-development-in-bioacoustics-2308.12688</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whombat-an-open-source-annotation-tool-for-machine-learning-development-in-bioacoustics-2308.12688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whombat-an-open-source-annotation-tool-for-machine-learning-development-in-bioacoustics-2308.12688"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-detection-of-ai-generated-speech-for-deepfake-voice-conversion-2308.12734</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-detection-of-ai-generated-speech-for-deepfake-voice-conversion-2308.12734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-detection-of-ai-generated-speech-for-deepfake-voice-conversion-2308.12734"/></url>
<url><loc>https://scifaro.com/en/abs/wavmark-watermarking-for-audio-generation-2308.12770</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavmark-watermarking-for-audio-generation-2308.12770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavmark-watermarking-for-audio-generation-2308.12770"/></url>
<url><loc>https://scifaro.com/en/abs/sparks-of-large-audio-models-a-survey-and-outlook-2308.12792</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparks-of-large-audio-models-a-survey-and-outlook-2308.12792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparks-of-large-audio-models-a-survey-and-outlook-2308.12792"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automated-animal-density-estimation-with-acoustic-spatial-capture-recapture-2308.12859</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automated-animal-density-estimation-with-acoustic-spatial-capture-recapture-2308.12859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automated-animal-density-estimation-with-acoustic-spatial-capture-recapture-2308.12859"/></url>
<url><loc>https://scifaro.com/en/abs/lcanets-robust-audio-classification-using-multi-layer-neural-networks-with-lateral-competition-2308.12882</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lcanets-robust-audio-classification-using-multi-layer-neural-networks-with-lateral-competition-2308.12882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lcanets-robust-audio-classification-using-multi-layer-neural-networks-with-lateral-competition-2308.12882"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-ai-music-generation-tools-and-models-2308.12982</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-ai-music-generation-tools-and-models-2308.12982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-ai-music-generation-tools-and-models-2308.12982"/></url>
<url><loc>https://scifaro.com/en/abs/generalizable-zero-shot-speaker-adaptive-speech-synthesis-with-disentangled-representations-2308.13007</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalizable-zero-shot-speaker-adaptive-speech-synthesis-with-disentangled-representations-2308.13007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalizable-zero-shot-speaker-adaptive-speech-synthesis-with-disentangled-representations-2308.13007"/></url>
<url><loc>https://scifaro.com/en/abs/deep-active-audio-feature-learning-in-resource-constrained-environments-2308.13201</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-active-audio-feature-learning-in-resource-constrained-environments-2308.13201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-active-audio-feature-learning-in-resource-constrained-environments-2308.13201"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-paragraph-text-to-speech-synthesis-with-multi-step-variational-autoencoder-2308.13365</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-paragraph-text-to-speech-synthesis-with-multi-step-variational-autoencoder-2308.13365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-paragraph-text-to-speech-synthesis-with-multi-step-variational-autoencoder-2308.13365"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-for-evaluation-methodologies-of-ai-generated-music-2308.13736</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-for-evaluation-methodologies-of-ai-generated-music-2308.13736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-for-evaluation-methodologies-of-ai-generated-music-2308.13736"/></url>
<url><loc>https://scifaro.com/en/abs/a-small-vocabulary-database-of-ultrasound-image-sequences-of-vocal-tract-dynamics-2308.13941</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-small-vocabulary-database-of-ultrasound-image-sequences-of-vocal-tract-dynamics-2308.13941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-small-vocabulary-database-of-ultrasound-image-sequences-of-vocal-tract-dynamics-2308.13941"/></url>
<url><loc>https://scifaro.com/en/abs/multi-subdomain-adversarial-network-for-cross-subject-eeg-based-emotion-recognition-2308.14059</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-subdomain-adversarial-network-for-cross-subject-eeg-based-emotion-recognition-2308.14059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-subdomain-adversarial-network-for-cross-subject-eeg-based-emotion-recognition-2308.14059"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-using-self-attention-based-frequency-pattern-analysis-of-machine-sounds-2308.14063</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-self-attention-based-frequency-pattern-analysis-of-machine-sounds-2308.14063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-self-attention-based-frequency-pattern-analysis-of-machine-sounds-2308.14063"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-acoustic-multi-domain-music-emotion-modeling-for-instrumental-music-2308.14317</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-acoustic-multi-domain-music-emotion-modeling-for-instrumental-music-2308.14317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-acoustic-multi-domain-music-emotion-modeling-for-instrumental-music-2308.14317"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-with-denoising-diffusion-probabilistic-gan-models-2308.14319</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-with-denoising-diffusion-probabilistic-gan-models-2308.14319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-with-denoising-diffusion-probabilistic-gan-models-2308.14319"/></url>
<url><loc>https://scifaro.com/en/abs/instructme-an-instruction-guided-music-edit-and-remix-framework-with-latent-diffusion-models-2308.14360</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instructme-an-instruction-guided-music-edit-and-remix-framework-with-latent-diffusion-models-2308.14360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instructme-an-instruction-guided-music-edit-and-remix-framework-with-latent-diffusion-models-2308.14360"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-transformer-a-novel-time-frequency-joint-learning-method-for-speech-emotion-recognition-2308.14568</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-transformer-a-novel-time-frequency-joint-learning-method-for-speech-emotion-recognition-2308.14568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-transformer-a-novel-time-frequency-joint-learning-method-for-speech-emotion-recognition-2308.14568"/></url>
<url><loc>https://scifaro.com/en/abs/pruning-self-attention-for-zero-shot-multi-speaker-text-to-speech-2308.14909</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pruning-self-attention-for-zero-shot-multi-speaker-text-to-speech-2308.14909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pruning-self-attention-for-zero-shot-multi-speaker-text-to-speech-2308.14909"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deepfake-detection-a-survey-2308.14970</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deepfake-detection-a-survey-2308.14970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deepfake-detection-a-survey-2308.14970"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-differentiable-digital-signal-processing-for-music-speech-synthesis-2308.15422</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-differentiable-digital-signal-processing-for-music-speech-synthesis-2308.15422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-differentiable-digital-signal-processing-for-music-speech-synthesis-2308.15422"/></url>
<url><loc>https://scifaro.com/en/abs/ags-an-dataset-and-taxonomy-for-domestic-scene-sound-event-recognition-2308.15726</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ags-an-dataset-and-taxonomy-for-domestic-scene-sound-event-recognition-2308.15726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ags-an-dataset-and-taxonomy-for-domestic-scene-sound-event-recognition-2308.15726"/></url>
<url><loc>https://scifaro.com/en/abs/aster-automatic-speech-recognition-system-accessibility-testing-for-stutterers-2308.15742</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aster-automatic-speech-recognition-system-accessibility-testing-for-stutterers-2308.15742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aster-automatic-speech-recognition-system-accessibility-testing-for-stutterers-2308.15742"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-transformer-based-neural-beamformer-for-target-speech-extraction-2308.15990</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-transformer-based-neural-beamformer-for-target-speech-extraction-2308.15990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-transformer-based-neural-beamformer-for-target-speech-extraction-2308.15990"/></url>
<url><loc>https://scifaro.com/en/abs/calm-contrastive-cross-modal-speaking-style-modeling-for-expressive-text-to-speech-synthesis-2308.16021</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calm-contrastive-cross-modal-speaking-style-modeling-for-expressive-text-to-speech-synthesis-2308.16021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calm-contrastive-cross-modal-speaking-style-modeling-for-expressive-text-to-speech-synthesis-2308.16021"/></url>
<url><loc>https://scifaro.com/en/abs/general-purpose-audio-effect-removal-2308.16177</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/general-purpose-audio-effect-removal-2308.16177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/general-purpose-audio-effect-removal-2308.16177"/></url>
<url><loc>https://scifaro.com/en/abs/the-biased-journey-of-msd-audio-zip-2308.16389</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-biased-journey-of-msd-audio-zip-2308.16389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-biased-journey-of-msd-audio-zip-2308.16389"/></url>
<url><loc>https://scifaro.com/en/abs/sequential-pitch-distributions-for-raga-detection-2308.16421</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequential-pitch-distributions-for-raga-detection-2308.16421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequential-pitch-distributions-for-raga-detection-2308.16421"/></url>
<url><loc>https://scifaro.com/en/abs/lightgrad-lightweight-diffusion-probabilistic-model-for-text-to-speech-2308.16569</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightgrad-lightweight-diffusion-probabilistic-model-for-text-to-speech-2308.16569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightgrad-lightweight-diffusion-probabilistic-model-for-text-to-speech-2308.16569"/></url>
<url><loc>https://scifaro.com/en/abs/improving-mandarin-prosodic-structure-prediction-with-multi-level-contextual-information-2308.16577</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-mandarin-prosodic-structure-prediction-with-multi-level-contextual-information-2308.16577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-mandarin-prosodic-structure-prediction-with-multi-level-contextual-information-2308.16577"/></url>
<url><loc>https://scifaro.com/en/abs/towards-spontaneous-style-modeling-with-semi-supervised-pre-training-for-conversational-text-to-speech-synthesis-2308.16593</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-spontaneous-style-modeling-with-semi-supervised-pre-training-for-conversational-text-to-speech-synthesis-2308.16593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-spontaneous-style-modeling-with-semi-supervised-pre-training-for-conversational-text-to-speech-synthesis-2308.16593"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-nsnet2-efficient-deep-noise-suppression-with-early-exiting-2308.16678</loc><lastmod>2024-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-nsnet2-efficient-deep-noise-suppression-with-early-exiting-2308.16678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-nsnet2-efficient-deep-noise-suppression-with-early-exiting-2308.16678"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improving-the-expressiveness-of-singing-voice-synthesis-with-bert-derived-semantic-information-2308.16836</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improving-the-expressiveness-of-singing-voice-synthesis-with-bert-derived-semantic-information-2308.16836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improving-the-expressiveness-of-singing-voice-synthesis-with-bert-derived-semantic-information-2308.16836"/></url>
<url><loc>https://scifaro.com/en/abs/qs-tts-towards-semi-supervised-text-to-speech-synthesis-via-vector-quantized-self-supervised-speech-representation-learning-2309.00126</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qs-tts-towards-semi-supervised-text-to-speech-synthesis-via-vector-quantized-self-supervised-speech-representation-learning-2309.00126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qs-tts-towards-semi-supervised-text-to-speech-synthesis-via-vector-quantized-self-supervised-speech-representation-learning-2309.00126"/></url>
<url><loc>https://scifaro.com/en/abs/improving-vision-inspired-keyword-spotting-using-dynamic-module-skipping-in-streaming-conformer-encoder-2309.00140</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-vision-inspired-keyword-spotting-using-dynamic-module-skipping-in-streaming-conformer-encoder-2309.00140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-vision-inspired-keyword-spotting-using-dynamic-module-skipping-in-streaming-conformer-encoder-2309.00140"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-the-vocal-range-of-single-speaker-singing-voice-synthesis-with-melody-unsupervised-pre-training-2309.00284</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-the-vocal-range-of-single-speaker-singing-voice-synthesis-with-melody-unsupervised-pre-training-2309.00284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-the-vocal-range-of-single-speaker-singing-voice-synthesis-with-melody-unsupervised-pre-training-2309.00284"/></url>
<url><loc>https://scifaro.com/en/abs/mi-go-test-framework-which-uses-youtube-as-data-source-for-evaluating-speech-recognition-models-like-openai-s-whisper-2309.00329</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mi-go-test-framework-which-uses-youtube-as-data-source-for-evaluating-speech-recognition-models-like-openai-s-whisper-2309.00329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mi-go-test-framework-which-uses-youtube-as-data-source-for-evaluating-speech-recognition-models-like-openai-s-whisper-2309.00329"/></url>
<url><loc>https://scifaro.com/en/abs/conette-an-efficient-audio-captioning-system-leveraging-multiple-datasets-with-task-embedding-2309.00454</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conette-an-efficient-audio-captioning-system-leveraging-multiple-datasets-with-task-embedding-2309.00454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conette-an-efficient-audio-captioning-system-leveraging-multiple-datasets-with-task-embedding-2309.00454"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-representations-for-bioacoustic-few-shot-detection-using-supervised-contrastive-learning-2309.00878</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-representations-for-bioacoustic-few-shot-detection-using-supervised-contrastive-learning-2309.00878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-representations-for-bioacoustic-few-shot-detection-using-supervised-contrastive-learning-2309.00878"/></url>
<url><loc>https://scifaro.com/en/abs/diclet-tts-diffusion-model-based-cross-lingual-emotion-transfer-for-text-to-speech-a-study-between-english-and-mandarin-2309.00883</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diclet-tts-diffusion-model-based-cross-lingual-emotion-transfer-for-text-to-speech-a-study-between-english-and-mandarin-2309.00883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diclet-tts-diffusion-model-based-cross-lingual-emotion-transfer-for-text-to-speech-a-study-between-english-and-mandarin-2309.00883"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-reserved-adversarial-attack-in-speaker-identification-2309.00929</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-reserved-adversarial-attack-in-speaker-identification-2309.00929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-reserved-adversarial-attack-in-speaker-identification-2309.00929"/></url>
<url><loc>https://scifaro.com/en/abs/nadiffuse-noise-aware-diffusion-based-model-for-speech-enhancement-2309.01212</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nadiffuse-noise-aware-diffusion-based-model-for-speech-enhancement-2309.01212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nadiffuse-noise-aware-diffusion-based-model-for-speech-enhancement-2309.01212"/></url>
<url><loc>https://scifaro.com/en/abs/mdsc-towards-evaluating-the-style-consistency-between-music-and-dance-2309.01340</loc><lastmod>2023-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdsc-towards-evaluating-the-style-consistency-between-music-and-dance-2309.01340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdsc-towards-evaluating-the-style-consistency-between-music-and-dance-2309.01340"/></url>
<url><loc>https://scifaro.com/en/abs/sememeasr-boosting-performance-of-end-to-end-speech-recognition-against-domain-and-long-tailed-data-shift-with-sememe-semantic-knowledge-2309.01437</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sememeasr-boosting-performance-of-end-to-end-speech-recognition-against-domain-and-long-tailed-data-shift-with-sememe-semantic-knowledge-2309.01437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sememeasr-boosting-performance-of-end-to-end-speech-recognition-against-domain-and-long-tailed-data-shift-with-sememe-semantic-knowledge-2309.01437"/></url>
<url><loc>https://scifaro.com/en/abs/eventtrojan-manipulating-non-intrusive-speech-quality-assessment-via-imperceptible-events-2309.01480</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eventtrojan-manipulating-non-intrusive-speech-quality-assessment-via-imperceptible-events-2309.01480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eventtrojan-manipulating-non-intrusive-speech-quality-assessment-via-imperceptible-events-2309.01480"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-methods-for-ground-truth-free-foreign-accent-conversion-2309.02133</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-methods-for-ground-truth-free-foreign-accent-conversion-2309.02133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-methods-for-ground-truth-free-foreign-accent-conversion-2309.02133"/></url>
<url><loc>https://scifaro.com/en/abs/fsd-an-initial-chinese-dataset-for-fake-song-detection-2309.02232</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fsd-an-initial-chinese-dataset-for-fake-song-detection-2309.02232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fsd-an-initial-chinese-dataset-for-fake-song-detection-2309.02232"/></url>
<url><loc>https://scifaro.com/en/abs/self-similarity-based-and-novelty-based-loss-for-music-structure-analysis-2309.02243</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-similarity-based-and-novelty-based-loss-for-music-structure-analysis-2309.02243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-similarity-based-and-novelty-based-loss-for-music-structure-analysis-2309.02243"/></url>
<url><loc>https://scifaro.com/en/abs/the-batik-plays-mozart-corpus-linking-performance-to-score-to-musicological-annotations-2309.02399</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-batik-plays-mozart-corpus-linking-performance-to-score-to-musicological-annotations-2309.02399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-batik-plays-mozart-corpus-linking-performance-to-score-to-musicological-annotations-2309.02399"/></url>
<url><loc>https://scifaro.com/en/abs/voice-morphing-two-identities-in-one-voice-2309.02404</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-morphing-two-identities-in-one-voice-2309.02404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-morphing-two-identities-in-one-voice-2309.02404"/></url>
<url><loc>https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-speech-recognition-through-down-sampling-acoustic-representation-2309.02459</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-speech-recognition-through-down-sampling-acoustic-representation-2309.02459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-speech-recognition-through-down-sampling-acoustic-representation-2309.02459"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-with-band-split-rope-transformer-2309.02612</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-with-band-split-rope-transformer-2309.02612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-with-band-split-rope-transformer-2309.02612"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-measurement-of-multiple-acoustic-attributes-using-structured-periodic-test-signals-including-music-and-other-sound-materials-2309.02767</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-measurement-of-multiple-acoustic-attributes-using-structured-periodic-test-signals-including-music-and-other-sound-materials-2309.02767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-measurement-of-multiple-acoustic-attributes-using-structured-periodic-test-signals-including-music-and-other-sound-materials-2309.02767"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-disentanglement-of-harmonic-and-rhythmic-features-in-music-audio-signals-2309.02796</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-disentanglement-of-harmonic-and-rhythmic-features-in-music-audio-signals-2309.02796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-disentanglement-of-harmonic-and-rhythmic-features-in-music-audio-signals-2309.02796"/></url>
<url><loc>https://scifaro.com/en/abs/bigvsan-enhancing-gan-based-neural-vocoders-with-slicing-adversarial-network-2309.02836</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigvsan-enhancing-gan-based-neural-vocoders-with-slicing-adversarial-network-2309.02836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigvsan-enhancing-gan-based-neural-vocoders-with-slicing-adversarial-network-2309.02836"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-temporary-deepfake-location-approach-based-embeddings-for-partially-spoofed-audio-detection-2309.03036</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-temporary-deepfake-location-approach-based-embeddings-for-partially-spoofed-audio-detection-2309.03036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-temporary-deepfake-location-approach-based-embeddings-for-partially-spoofed-audio-detection-2309.03036"/></url>
<url><loc>https://scifaro.com/en/abs/presenting-the-swtc-a-symbolic-corpus-of-themes-from-john-williams-star-wars-episodes-i-ix-2309.03298</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/presenting-the-swtc-a-symbolic-corpus-of-themes-from-john-williams-star-wars-episodes-i-ix-2309.03298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/presenting-the-swtc-a-symbolic-corpus-of-themes-from-john-williams-star-wars-episodes-i-ix-2309.03298"/></url>
<url><loc>https://scifaro.com/en/abs/highly-controllable-diffusion-based-any-to-any-voice-conversion-model-with-frame-level-prosody-feature-2309.03364</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/highly-controllable-diffusion-based-any-to-any-voice-conversion-model-with-frame-level-prosody-feature-2309.03364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/highly-controllable-diffusion-based-any-to-any-voice-conversion-model-with-frame-level-prosody-feature-2309.03364"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-sound-recognition-for-efficient-underwater-data-analysis-2309.03451</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-sound-recognition-for-efficient-underwater-data-analysis-2309.03451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-sound-recognition-for-efficient-underwater-data-analysis-2309.03451"/></url>
<url><loc>https://scifaro.com/en/abs/topological-fingerprints-for-audio-identification-2309.03516</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topological-fingerprints-for-audio-identification-2309.03516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topological-fingerprints-for-audio-identification-2309.03516"/></url>
<url><loc>https://scifaro.com/en/abs/mvd-a-novel-methodology-and-dataset-for-acoustic-vehicle-type-classification-2309.03544</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mvd-a-novel-methodology-and-dataset-for-acoustic-vehicle-type-classification-2309.03544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mvd-a-novel-methodology-and-dataset-for-acoustic-vehicle-type-classification-2309.03544"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-self-supervised-learning-of-speech-representation-via-invariance-and-redundancy-reduction-2309.03619</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-self-supervised-learning-of-speech-representation-via-invariance-and-redundancy-reduction-2309.03619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-self-supervised-learning-of-speech-representation-via-invariance-and-redundancy-reduction-2309.03619"/></url>
<url><loc>https://scifaro.com/en/abs/spiking-structured-state-space-model-for-monaural-speech-enhancement-2309.03641</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spiking-structured-state-space-model-for-monaural-speech-enhancement-2309.03641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spiking-structured-state-space-model-for-monaural-speech-enhancement-2309.03641"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-captioning-via-audibility-guidance-2309.03884</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-via-audibility-guidance-2309.03884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-via-audibility-guidance-2309.03884"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-automatic-audiobook-creation-2309.03926</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-automatic-audiobook-creation-2309.03926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-automatic-audiobook-creation-2309.03926"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-speech-codec-for-noise-robust-speech-coding-2309.04132</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-speech-codec-for-noise-robust-speech-coding-2309.04132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-speech-codec-for-noise-robust-speech-coding-2309.04132"/></url>
<url><loc>https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-speech-generation-2309.04156</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-speech-generation-2309.04156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-utterance-conditioned-vae-for-speech-generation-2309.04156"/></url>
<url><loc>https://scifaro.com/en/abs/a-long-tail-friendly-representation-framework-for-artist-and-music-similarity-2309.04182</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-long-tail-friendly-representation-framework-for-artist-and-music-similarity-2309.04182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-long-tail-friendly-representation-framework-for-artist-and-music-similarity-2309.04182"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-and-limited-data-voice-conversion-using-stochastic-variational-deep-kernel-learning-2309.04420</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-and-limited-data-voice-conversion-using-stochastic-variational-deep-kernel-learning-2309.04420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-and-limited-data-voice-conversion-using-stochastic-variational-deep-kernel-learning-2309.04420"/></url>
<url><loc>https://scifaro.com/en/abs/covid-19-detection-system-a-comparative-analysis-of-system-performance-based-on-acoustic-features-of-cough-audio-signals-2309.04505</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covid-19-detection-system-a-comparative-analysis-of-system-performance-based-on-acoustic-features-of-cough-audio-signals-2309.04505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covid-19-detection-system-a-comparative-analysis-of-system-performance-based-on-acoustic-features-of-cough-audio-signals-2309.04505"/></url>
<url><loc>https://scifaro.com/en/abs/the-power-of-sound-tpos-audio-reactive-video-generation-with-stable-diffusion-2309.04509</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-power-of-sound-tpos-audio-reactive-video-generation-with-stable-diffusion-2309.04509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-power-of-sound-tpos-audio-reactive-video-generation-with-stable-diffusion-2309.04509"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-domain-specific-enhancements-for-a-neural-foley-synthesizer-2309.04641</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-domain-specific-enhancements-for-a-neural-foley-synthesizer-2309.04641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-domain-specific-enhancements-for-a-neural-foley-synthesizer-2309.04641"/></url>
<url><loc>https://scifaro.com/en/abs/mask-ctc-based-encoder-pre-training-for-streaming-end-to-end-speech-recognition-2309.04654</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-ctc-based-encoder-pre-training-for-streaming-end-to-end-speech-recognition-2309.04654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-ctc-based-encoder-pre-training-for-streaming-end-to-end-speech-recognition-2309.04654"/></url>
<url><loc>https://scifaro.com/en/abs/audrandaug-random-image-augmentations-for-audio-classification-2309.04762</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audrandaug-random-image-augmentations-for-audio-classification-2309.04762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audrandaug-random-image-augmentations-for-audio-classification-2309.04762"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-music-genre-classification-algorithm-analysis-and-deployment-architecture-2309.04861</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-music-genre-classification-algorithm-analysis-and-deployment-architecture-2309.04861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-music-genre-classification-algorithm-analysis-and-deployment-architecture-2309.04861"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-emotional-adaptation-for-audio-driven-talking-head-generation-2309.04946</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-emotional-adaptation-for-audio-driven-talking-head-generation-2309.04946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-emotional-adaptation-for-audio-driven-talking-head-generation-2309.04946"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-fish-feeding-intensity-assessment-in-aquaculture-2309.05058</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-fish-feeding-intensity-assessment-in-aquaculture-2309.05058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-fish-feeding-intensity-assessment-in-aquaculture-2309.05058"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-feature-imbalance-in-sound-source-separation-2309.05287</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-feature-imbalance-in-sound-source-separation-2309.05287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-feature-imbalance-in-sound-source-separation-2309.05287"/></url>
<url><loc>https://scifaro.com/en/abs/edac-efficient-deployment-of-audio-classification-models-for-covid-19-detection-2309.05357</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edac-efficient-deployment-of-audio-classification-models-for-covid-19-detection-2309.05357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edac-efficient-deployment-of-audio-classification-models-for-covid-19-detection-2309.05357"/></url>
<url><loc>https://scifaro.com/en/abs/slidespeech-a-large-scale-slide-enriched-audio-visual-corpus-2309.05396</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slidespeech-a-large-scale-slide-enriched-audio-visual-corpus-2309.05396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slidespeech-a-large-scale-slide-enriched-audio-visual-corpus-2309.05396"/></url>
<url><loc>https://scifaro.com/en/abs/undecidability-results-and-their-relevance-in-modern-music-making-2309.05595</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/undecidability-results-and-their-relevance-in-modern-music-making-2309.05595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/undecidability-results-and-their-relevance-in-modern-music-making-2309.05595"/></url>
<url><loc>https://scifaro.com/en/abs/kernel-interpolation-of-incident-sound-field-in-region-including-scattering-objects-2309.05634</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kernel-interpolation-of-incident-sound-field-in-region-including-scattering-objects-2309.05634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kernel-interpolation-of-incident-sound-field-in-region-including-scattering-objects-2309.05634"/></url>
<url><loc>https://scifaro.com/en/abs/natural-language-supervision-for-general-purpose-audio-representations-2309.05767</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/natural-language-supervision-for-general-purpose-audio-representations-2309.05767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/natural-language-supervision-for-general-purpose-audio-representations-2309.05767"/></url>
<url><loc>https://scifaro.com/en/abs/synvox2-towards-a-privacy-friendly-voxceleb2-dataset-2309.06141</loc><lastmod>2023-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synvox2-towards-a-privacy-friendly-voxceleb2-dataset-2309.06141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synvox2-towards-a-privacy-friendly-voxceleb2-dataset-2309.06141"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-modelling-of-percussive-audio-with-transient-and-spectral-synthesis-2309.06649</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-modelling-of-percussive-audio-with-transient-and-spectral-synthesis-2309.06649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-modelling-of-percussive-audio-with-transient-and-spectral-synthesis-2309.06649"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-encoder-decoder-end-to-end-neural-diarization-with-embedding-enhancer-2309.06672</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-encoder-decoder-end-to-end-neural-diarization-with-embedding-enhancer-2309.06672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-encoder-decoder-end-to-end-neural-diarization-with-embedding-enhancer-2309.06672"/></url>
<url><loc>https://scifaro.com/en/abs/piave-a-pose-invariant-audio-visual-speaker-extraction-network-2309.06723</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/piave-a-pose-invariant-audio-visual-speaker-extraction-network-2309.06723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/piave-a-pose-invariant-audio-visual-speaker-extraction-network-2309.06723"/></url>
<url><loc>https://scifaro.com/en/abs/distinguishing-neural-speech-synthesis-models-through-fingerprints-in-speech-waveforms-2309.06780</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distinguishing-neural-speech-synthesis-models-through-fingerprints-in-speech-waveforms-2309.06780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distinguishing-neural-speech-synthesis-models-through-fingerprints-in-speech-waveforms-2309.06780"/></url>
<url><loc>https://scifaro.com/en/abs/dctts-discrete-diffusion-model-with-contrastive-learning-for-text-to-speech-generation-2309.06787</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dctts-discrete-diffusion-model-with-contrastive-learning-for-text-to-speech-generation-2309.06787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dctts-discrete-diffusion-model-with-contrastive-learning-for-text-to-speech-generation-2309.06787"/></url>
<url><loc>https://scifaro.com/en/abs/emalg-an-enhanced-mandarin-lombard-grid-corpus-with-meaningful-sentences-2309.06858</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emalg-an-enhanced-mandarin-lombard-grid-corpus-with-meaningful-sentences-2309.06858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emalg-an-enhanced-mandarin-lombard-grid-corpus-with-meaningful-sentences-2309.06858"/></url>
<url><loc>https://scifaro.com/en/abs/getting-more-for-less-using-weak-labels-and-av-mixup-for-robust-audio-visual-speaker-verification-2309.07115</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/getting-more-for-less-using-weak-labels-and-av-mixup-for-robust-audio-visual-speaker-verification-2309.07115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/getting-more-for-less-using-weak-labels-and-av-mixup-for-robust-audio-visual-speaker-verification-2309.07115"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-models-for-audio-semantic-communication-2309.07195</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-models-for-audio-semantic-communication-2309.07195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-models-for-audio-semantic-communication-2309.07195"/></url>
<url><loc>https://scifaro.com/en/abs/audiosr-versatile-audio-super-resolution-at-scale-2309.07314</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiosr-versatile-audio-super-resolution-at-scale-2309.07314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiosr-versatile-audio-super-resolution-at-scale-2309.07314"/></url>
<url><loc>https://scifaro.com/en/abs/encodecmae-leveraging-neural-codecs-for-universal-audio-representation-learning-2309.07391</loc><lastmod>2024-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/encodecmae-leveraging-neural-codecs-for-universal-audio-representation-learning-2309.07391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/encodecmae-leveraging-neural-codecs-for-universal-audio-representation-learning-2309.07391"/></url>
<url><loc>https://scifaro.com/en/abs/funcodec-a-fundamental-reproducible-and-integrable-open-source-toolkit-for-neural-speech-codec-2309.07405</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/funcodec-a-fundamental-reproducible-and-integrable-open-source-toolkit-for-neural-speech-codec-2309.07405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/funcodec-a-fundamental-reproducible-and-integrable-open-source-toolkit-for-neural-speech-codec-2309.07405"/></url>
<url><loc>https://scifaro.com/en/abs/banc-towards-efficient-binaural-audio-neural-codec-for-overlapping-speech-2309.07416</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/banc-towards-efficient-binaural-audio-neural-codec-for-overlapping-speech-2309.07416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/banc-towards-efficient-binaural-audio-neural-codec-for-overlapping-speech-2309.07416"/></url>
<url><loc>https://scifaro.com/en/abs/mandarin-lombard-flavor-classification-2309.07419</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mandarin-lombard-flavor-classification-2309.07419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mandarin-lombard-flavor-classification-2309.07419"/></url>
<url><loc>https://scifaro.com/en/abs/spatialcodec-neural-spatial-speech-coding-2309.07432</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatialcodec-neural-spatial-speech-coding-2309.07432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatialcodec-neural-spatial-speech-coding-2309.07432"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-speech-separation-performance-degradation-on-emotional-speech-mixtures-2309.07458</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-speech-separation-performance-degradation-on-emotional-speech-mixtures-2309.07458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-speech-separation-performance-degradation-on-emotional-speech-mixtures-2309.07458"/></url>
<url><loc>https://scifaro.com/en/abs/outlier-aware-inlier-modeling-and-multi-scale-scoring-for-anomalous-sound-detection-via-multitask-learning-2309.07500</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/outlier-aware-inlier-modeling-and-multi-scale-scoring-for-anomalous-sound-detection-via-multitask-learning-2309.07500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/outlier-aware-inlier-modeling-and-multi-scale-scoring-for-anomalous-sound-detection-via-multitask-learning-2309.07500"/></url>
<url><loc>https://scifaro.com/en/abs/singfake-singing-voice-deepfake-detection-2309.07525</loc><lastmod>2026-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singfake-singing-voice-deepfake-detection-2309.07525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singfake-singing-voice-deepfake-detection-2309.07525"/></url>
<url><loc>https://scifaro.com/en/abs/speech-to-speech-translation-with-discrete-unit-based-style-transfer-2309.07566</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-to-speech-translation-with-discrete-unit-based-style-transfer-2309.07566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-to-speech-translation-with-discrete-unit-based-style-transfer-2309.07566"/></url>
<url><loc>https://scifaro.com/en/abs/aas-vc-on-the-generalization-ability-of-automatic-alignment-search-based-non-autoregressive-sequence-to-sequence-voice-conversion-2309.07598</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aas-vc-on-the-generalization-ability-of-automatic-alignment-search-based-non-autoregressive-sequence-to-sequence-voice-conversion-2309.07598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aas-vc-on-the-generalization-ability-of-automatic-alignment-search-based-non-autoregressive-sequence-to-sequence-voice-conversion-2309.07598"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-audio-captioning-using-machine-translated-data-2309.07615</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-audio-captioning-using-machine-translated-data-2309.07615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-audio-captioning-using-machine-translated-data-2309.07615"/></url>
<url><loc>https://scifaro.com/en/abs/ddsp-based-neural-waveform-synthesis-of-polyphonic-guitar-performance-from-string-wise-midi-input-2309.07658</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddsp-based-neural-waveform-synthesis-of-polyphonic-guitar-performance-from-string-wise-midi-input-2309.07658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddsp-based-neural-waveform-synthesis-of-polyphonic-guitar-performance-from-string-wise-midi-input-2309.07658"/></url>
<url><loc>https://scifaro.com/en/abs/echotune-a-modular-extractor-leveraging-the-variable-length-nature-of-speech-in-asr-tasks-2309.07765</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echotune-a-modular-extractor-leveraging-the-variable-length-nature-of-speech-in-asr-tasks-2309.07765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echotune-a-modular-extractor-leveraging-the-variable-length-nature-of-speech-in-asr-tasks-2309.07765"/></url>
<url><loc>https://scifaro.com/en/abs/ciwagan-articulatory-information-exchange-2309.07861</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ciwagan-articulatory-information-exchange-2309.07861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ciwagan-articulatory-information-exchange-2309.07861"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-assessment-of-markov-models-and-recurrent-neural-networks-for-jazz-music-generation-2309.08027</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-assessment-of-markov-models-and-recurrent-neural-networks-for-jazz-music-generation-2309.08027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-assessment-of-markov-models-and-recurrent-neural-networks-for-jazz-music-generation-2309.08027"/></url>
<url><loc>https://scifaro.com/en/abs/voicepat-an-efficient-open-source-evaluation-toolkit-for-voice-privacy-research-2309.08049</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicepat-an-efficient-open-source-evaluation-toolkit-for-voice-privacy-research-2309.08049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicepat-an-efficient-open-source-evaluation-toolkit-for-voice-privacy-research-2309.08049"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-text-to-audio-generation-2309.08051</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-text-to-audio-generation-2309.08051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-text-to-audio-generation-2309.08051"/></url>
<url><loc>https://scifaro.com/en/abs/ssl-net-a-synergistic-spectral-and-learning-based-network-for-efficient-bird-sound-classification-2309.08072</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssl-net-a-synergistic-spectral-and-learning-based-network-for-efficient-bird-sound-classification-2309.08072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssl-net-a-synergistic-spectral-and-learning-based-network-for-efficient-bird-sound-classification-2309.08072"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-the-temporal-dynamics-of-universal-speech-representations-for-generalizable-deepfake-detection-2309.08099</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-the-temporal-dynamics-of-universal-speech-representations-for-generalizable-deepfake-detection-2309.08099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-the-temporal-dynamics-of-universal-speech-representations-for-generalizable-deepfake-detection-2309.08099"/></url>
<url><loc>https://scifaro.com/en/abs/foundation-model-assisted-automatic-speech-emotion-recognition-transcribing-annotating-and-augmenting-2309.08108</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foundation-model-assisted-automatic-speech-emotion-recognition-transcribing-annotating-and-augmenting-2309.08108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foundation-model-assisted-automatic-speech-emotion-recognition-transcribing-annotating-and-augmenting-2309.08108"/></url>
<url><loc>https://scifaro.com/en/abs/diversity-based-core-set-selection-for-text-to-speech-with-linguistic-and-acoustic-features-2309.08127</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diversity-based-core-set-selection-for-text-to-speech-with-linguistic-and-acoustic-features-2309.08127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diversity-based-core-set-selection-for-text-to-speech-with-linguistic-and-acoustic-features-2309.08127"/></url>
<url><loc>https://scifaro.com/en/abs/two-step-knowledge-distillation-for-tiny-speech-enhancement-2309.08144</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-step-knowledge-distillation-for-tiny-speech-enhancement-2309.08144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-step-knowledge-distillation-for-tiny-speech-enhancement-2309.08144"/></url>
<url><loc>https://scifaro.com/en/abs/syn-att-synthetic-speech-attribution-via-semi-supervised-unknown-multi-class-ensemble-of-cnns-2309.08146</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syn-att-synthetic-speech-attribution-via-semi-supervised-unknown-multi-class-ensemble-of-cnns-2309.08146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syn-att-synthetic-speech-attribution-via-semi-supervised-unknown-multi-class-ensemble-of-cnns-2309.08146"/></url>
<url><loc>https://scifaro.com/en/abs/residual-speaker-representation-for-one-shot-voice-conversion-2309.08166</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-speaker-representation-for-one-shot-voice-conversion-2309.08166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-speaker-representation-for-one-shot-voice-conversion-2309.08166"/></url>
<url><loc>https://scifaro.com/en/abs/tf-sepnet-an-efficient-1d-kernel-design-in-cnns-for-low-complexity-acoustic-scene-classification-2309.08200</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tf-sepnet-an-efficient-1d-kernel-design-in-cnns-for-low-complexity-acoustic-scene-classification-2309.08200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tf-sepnet-an-efficient-1d-kernel-design-in-cnns-for-low-complexity-acoustic-scene-classification-2309.08200"/></url>
<url><loc>https://scifaro.com/en/abs/hm-conformer-a-conformer-based-audio-deepfake-detection-system-with-hierarchical-pooling-and-multi-level-classification-token-aggregation-methods-2309.08208</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hm-conformer-a-conformer-based-audio-deepfake-detection-system-with-hierarchical-pooling-and-multi-level-classification-token-aggregation-methods-2309.08208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hm-conformer-a-conformer-based-audio-deepfake-detection-system-with-hierarchical-pooling-and-multi-level-classification-token-aggregation-methods-2309.08208"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-meta-information-for-audio-based-zero-shot-bird-classification-2309.08398</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-meta-information-for-audio-based-zero-shot-bird-classification-2309.08398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-meta-information-for-audio-based-zero-shot-bird-classification-2309.08398"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-active-speaker-extraction-for-sparsely-overlapped-multi-talker-speech-2309.08408</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-active-speaker-extraction-for-sparsely-overlapped-multi-talker-speech-2309.08408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-active-speaker-extraction-for-sparsely-overlapped-multi-talker-speech-2309.08408"/></url>
<url><loc>https://scifaro.com/en/abs/diverse-audio-embeddings-bringing-features-back-outperforms-clap-2309.08751</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diverse-audio-embeddings-bringing-features-back-outperforms-clap-2309.08751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diverse-audio-embeddings-bringing-features-back-outperforms-clap-2309.08751"/></url>
<url><loc>https://scifaro.com/en/abs/enhance-audio-generation-controllability-through-representation-similarity-regularization-2309.08773</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhance-audio-generation-controllability-through-representation-similarity-regularization-2309.08773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhance-audio-generation-controllability-through-representation-similarity-regularization-2309.08773"/></url>
<url><loc>https://scifaro.com/en/abs/fastgraphtts-an-ultrafast-syntax-aware-speech-synthesis-framework-2309.08837</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastgraphtts-an-ultrafast-syntax-aware-speech-synthesis-framework-2309.08837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastgraphtts-an-ultrafast-syntax-aware-speech-synthesis-framework-2309.08837"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-latent-space-reconstruction-learning-for-audio-text-retrieval-2309.08839</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-latent-space-reconstruction-learning-for-audio-text-retrieval-2309.08839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-latent-space-reconstruction-learning-for-audio-text-retrieval-2309.08839"/></url>
<url><loc>https://scifaro.com/en/abs/regularized-contrastive-pre-training-for-few-shot-bioacoustic-sound-detection-2309.08971</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regularized-contrastive-pre-training-for-few-shot-bioacoustic-sound-detection-2309.08971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regularized-contrastive-pre-training-for-few-shot-bioacoustic-sound-detection-2309.08971"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-based-on-generative-adversarial-networks-with-transformer-2309.09075</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-based-on-generative-adversarial-networks-with-transformer-2309.09075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-based-on-generative-adversarial-networks-with-transformer-2309.09075"/></url>
<url><loc>https://scifaro.com/en/abs/synthtab-leveraging-synthesized-data-for-guitar-tablature-transcription-2309.09085</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthtab-leveraging-synthesized-data-for-guitar-tablature-transcription-2309.09085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthtab-leveraging-synthesized-data-for-guitar-tablature-transcription-2309.09085"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-gan-based-vocoders-with-contrastive-learning-under-data-limited-condition-2309.09088</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-gan-based-vocoders-with-contrastive-learning-under-data-limited-condition-2309.09088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-gan-based-vocoders-with-contrastive-learning-under-data-limited-condition-2309.09088"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-quantised-end-to-end-asr-models-via-personalisation-2309.09136</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-quantised-end-to-end-asr-models-via-personalisation-2309.09136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-quantised-end-to-end-asr-models-via-personalisation-2309.09136"/></url>
<url><loc>https://scifaro.com/en/abs/zero-and-few-shot-sound-event-localization-and-detection-2309.09223</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-and-few-shot-sound-event-localization-and-detection-2309.09223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-and-few-shot-sound-event-localization-and-detection-2309.09223"/></url>
<url><loc>https://scifaro.com/en/abs/sound-source-distance-estimation-in-diverse-and-dynamic-acoustic-conditions-2309.09288</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-source-distance-estimation-in-diverse-and-dynamic-acoustic-conditions-2309.09288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-source-distance-estimation-in-diverse-and-dynamic-acoustic-conditions-2309.09288"/></url>
<url><loc>https://scifaro.com/en/abs/a-few-shot-approach-to-dysarthric-speech-intelligibility-level-classification-using-transformers-2309.09329</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-few-shot-approach-to-dysarthric-speech-intelligibility-level-classification-using-transformers-2309.09329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-few-shot-approach-to-dysarthric-speech-intelligibility-level-classification-using-transformers-2309.09329"/></url>
<url><loc>https://scifaro.com/en/abs/are-soft-prompts-good-zero-shot-learners-for-speech-recognition-2309.09413</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-soft-prompts-good-zero-shot-learners-for-speech-recognition-2309.09413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-soft-prompts-good-zero-shot-learners-for-speech-recognition-2309.09413"/></url>
<url><loc>https://scifaro.com/en/abs/spiking-leaf-a-learnable-auditory-front-end-for-spiking-neural-networks-2309.09469</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spiking-leaf-a-learnable-auditory-front-end-for-spiking-neural-networks-2309.09469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spiking-leaf-a-learnable-auditory-front-end-for-spiking-neural-networks-2309.09469"/></url>
<url><loc>https://scifaro.com/en/abs/face-driven-zero-shot-voice-conversion-with-memory-based-face-voice-alignment-2309.09470</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/face-driven-zero-shot-voice-conversion-with-memory-based-face-voice-alignment-2309.09470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/face-driven-zero-shot-voice-conversion-with-memory-based-face-voice-alignment-2309.09470"/></url>
<url><loc>https://scifaro.com/en/abs/humtrans-a-novel-open-source-dataset-for-humming-melody-transcription-and-beyond-2309.09623</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/humtrans-a-novel-open-source-dataset-for-humming-melody-transcription-and-beyond-2309.09623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/humtrans-a-novel-open-source-dataset-for-humming-melody-transcription-and-beyond-2309.09623"/></url>
<url><loc>https://scifaro.com/en/abs/electrolaryngeal-speech-intelligibility-enhancement-through-robust-linguistic-encoders-2309.09627</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/electrolaryngeal-speech-intelligibility-enhancement-through-robust-linguistic-encoders-2309.09627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/electrolaryngeal-speech-intelligibility-enhancement-through-robust-linguistic-encoders-2309.09627"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-by-unrolling-diffusion-process-using-neural-network-layers-2309.09652</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-by-unrolling-diffusion-process-using-neural-network-layers-2309.09652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-by-unrolling-diffusion-process-using-neural-network-layers-2309.09652"/></url>
<url><loc>https://scifaro.com/en/abs/synth-ac-enhancing-audio-captioning-with-synthetic-supervision-2309.09705</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synth-ac-enhancing-audio-captioning-with-synthetic-supervision-2309.09705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synth-ac-enhancing-audio-captioning-with-synthetic-supervision-2309.09705"/></url>
<url><loc>https://scifaro.com/en/abs/frame-to-utterance-convergence-a-spectra-temporal-approach-for-unified-spoofing-detection-2309.09837</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-to-utterance-convergence-a-spectra-temporal-approach-for-unified-spoofing-detection-2309.09837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-to-utterance-convergence-a-spectra-temporal-approach-for-unified-spoofing-detection-2309.09837"/></url>
<url><loc>https://scifaro.com/en/abs/crowdotic-a-privacy-preserving-hospital-waiting-room-crowd-density-estimation-with-non-speech-audio-2309.10280</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdotic-a-privacy-preserving-hospital-waiting-room-crowd-density-estimation-with-non-speech-audio-2309.10280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdotic-a-privacy-preserving-hospital-waiting-room-crowd-density-estimation-with-non-speech-audio-2309.10280"/></url>
<url><loc>https://scifaro.com/en/abs/pdpcrn-parallel-dual-path-crn-with-bi-directional-inter-branch-interactions-for-multi-channel-speech-enhancement-2309.10379</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pdpcrn-parallel-dual-path-crn-with-bi-directional-inter-branch-interactions-for-multi-channel-speech-enhancement-2309.10379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pdpcrn-parallel-dual-path-crn-with-bi-directional-inter-branch-interactions-for-multi-channel-speech-enhancement-2309.10379"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-modeling-of-spatial-cues-via-spherical-harmonics-for-multi-channel-speech-enhancement-2309.10393</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-modeling-of-spatial-cues-via-spherical-harmonics-for-multi-channel-speech-enhancement-2309.10393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-modeling-of-spatial-cues-via-spherical-harmonics-for-multi-channel-speech-enhancement-2309.10393"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-diarization-using-semantic-information-joint-pairwise-constraints-propagation-2309.10456</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-diarization-using-semantic-information-joint-pairwise-constraints-propagation-2309.10456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-diarization-using-semantic-information-joint-pairwise-constraints-propagation-2309.10456"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-sentence-type-effects-on-the-lombard-effect-and-intelligibility-enhancement-a-comparative-study-of-natural-and-grid-sentences-2309.10485</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-sentence-type-effects-on-the-lombard-effect-and-intelligibility-enhancement-a-comparative-study-of-natural-and-grid-sentences-2309.10485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-sentence-type-effects-on-the-lombard-effect-and-intelligibility-enhancement-a-comparative-study-of-natural-and-grid-sentences-2309.10485"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-spoof-gap-a-unified-parallel-aggregation-network-for-voice-presentation-attacks-2309.10560</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-spoof-gap-a-unified-parallel-aggregation-network-for-voice-presentation-attacks-2309.10560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-spoof-gap-a-unified-parallel-aggregation-network-for-voice-presentation-attacks-2309.10560"/></url>
<url><loc>https://scifaro.com/en/abs/motif-centric-representation-learning-for-symbolic-music-2309.10597</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/motif-centric-representation-learning-for-symbolic-music-2309.10597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/motif-centric-representation-learning-for-symbolic-music-2309.10597"/></url>
<url><loc>https://scifaro.com/en/abs/used-universal-speaker-extraction-and-diarization-2309.10674</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/used-universal-speaker-extraction-and-diarization-2309.10674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/used-universal-speaker-extraction-and-diarization-2309.10674"/></url>
<url><loc>https://scifaro.com/en/abs/harmony-and-duality-an-introduction-to-music-theory-2309.10719</loc><lastmod>2026-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmony-and-duality-an-introduction-to-music-theory-2309.10719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmony-and-duality-an-introduction-to-music-theory-2309.10719"/></url>
<url><loc>https://scifaro.com/en/abs/melodyglm-multi-task-pre-training-for-symbolic-melody-generation-2309.10738</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melodyglm-multi-task-pre-training-for-symbolic-melody-generation-2309.10738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melodyglm-multi-task-pre-training-for-symbolic-melody-generation-2309.10738"/></url>
<url><loc>https://scifaro.com/en/abs/consistencytta-accelerating-diffusion-based-text-to-audio-generation-with-consistency-distillation-2309.10740</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistencytta-accelerating-diffusion-based-text-to-audio-generation-with-consistency-distillation-2309.10740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistencytta-accelerating-diffusion-based-text-to-audio-generation-with-consistency-distillation-2309.10740"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-multi-channel-speech-enhancement-with-spherical-harmonics-injection-for-directional-encoding-2309.10832</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-multi-channel-speech-enhancement-with-spherical-harmonics-injection-for-directional-encoding-2309.10832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-multi-channel-speech-enhancement-with-spherical-harmonics-injection-for-directional-encoding-2309.10832"/></url>
<url><loc>https://scifaro.com/en/abs/test-time-training-for-speech-2309.10930</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/test-time-training-for-speech-2309.10930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/test-time-training-for-speech-2309.10930"/></url>
<url><loc>https://scifaro.com/en/abs/directional-source-separation-for-robust-speech-recognition-on-smart-glasses-2309.10993</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-source-separation-for-robust-speech-recognition-on-smart-glasses-2309.10993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-source-separation-for-robust-speech-recognition-on-smart-glasses-2309.10993"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-personalization-methods-in-text-to-music-generation-2309.11140</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-personalization-methods-in-text-to-music-generation-2309.11140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-personalization-methods-in-text-to-music-generation-2309.11140"/></url>
<url><loc>https://scifaro.com/en/abs/auto-acd-a-large-scale-dataset-for-audio-language-representation-learning-2309.11500</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-acd-a-large-scale-dataset-for-audio-language-representation-learning-2309.11500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-acd-a-large-scale-dataset-for-audio-language-representation-learning-2309.11500"/></url>
<url><loc>https://scifaro.com/en/abs/fluenteditor-text-based-speech-editing-by-considering-acoustic-and-prosody-consistency-2309.11725</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fluenteditor-text-based-speech-editing-by-considering-acoustic-and-prosody-consistency-2309.11725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fluenteditor-text-based-speech-editing-by-considering-acoustic-and-prosody-consistency-2309.11725"/></url>
<url><loc>https://scifaro.com/en/abs/tmac-temporal-multi-modal-graph-learning-for-acoustic-event-classification-2309.11845</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tmac-temporal-multi-modal-graph-learning-for-acoustic-event-classification-2309.11845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tmac-temporal-multi-modal-graph-learning-for-acoustic-event-classification-2309.11845"/></url>
<url><loc>https://scifaro.com/en/abs/a-discourse-level-multi-scale-prosodic-model-for-fine-grained-emotion-analysis-2309.11849</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-discourse-level-multi-scale-prosodic-model-for-fine-grained-emotion-analysis-2309.11849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-discourse-level-multi-scale-prosodic-model-for-fine-grained-emotion-analysis-2309.11849"/></url>
<url><loc>https://scifaro.com/en/abs/audio-contrastive-based-fine-tuning-decoupling-representation-learning-and-classification-2309.11895</loc><lastmod>2025-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-contrastive-based-fine-tuning-decoupling-representation-learning-and-classification-2309.11895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-contrastive-based-fine-tuning-decoupling-representation-learning-and-classification-2309.11895"/></url>
<url><loc>https://scifaro.com/en/abs/improving-language-model-based-zero-shot-text-to-speech-synthesis-with-multi-scale-acoustic-prompts-2309.11977</loc><lastmod>2024-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-language-model-based-zero-shot-text-to-speech-synthesis-with-multi-scale-acoustic-prompts-2309.11977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-language-model-based-zero-shot-text-to-speech-synthesis-with-multi-scale-acoustic-prompts-2309.11977"/></url>
<url><loc>https://scifaro.com/en/abs/passage-summarization-with-recurrent-models-for-audio-sheet-music-retrieval-2309.12111</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/passage-summarization-with-recurrent-models-for-audio-sheet-music-retrieval-2309.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/passage-summarization-with-recurrent-models-for-audio-sheet-music-retrieval-2309.12111"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-robust-audio-sheet-music-retrieval-systems-2309.12134</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-robust-audio-sheet-music-retrieval-systems-2309.12134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-robust-audio-sheet-music-retrieval-systems-2309.12134"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-and-truly-large-scale-audio-sheet-music-retrieval-2309.12158</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-and-truly-large-scale-audio-sheet-music-retrieval-2309.12158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-and-truly-large-scale-audio-sheet-music-retrieval-2309.12158"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-automated-audio-captioning-via-text-only-training-2309.12242</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-automated-audio-captioning-via-text-only-training-2309.12242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-automated-audio-captioning-via-text-only-training-2309.12242"/></url>
<url><loc>https://scifaro.com/en/abs/performance-conditioning-for-diffusion-based-multi-instrument-music-synthesis-2309.12283</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-conditioning-for-diffusion-based-multi-instrument-music-synthesis-2309.12283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-conditioning-for-diffusion-based-multi-instrument-music-synthesis-2309.12283"/></url>
<url><loc>https://scifaro.com/en/abs/profile-error-tolerant-target-speaker-voice-activity-detection-2309.12521</loc><lastmod>2024-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/profile-error-tolerant-target-speaker-voice-activity-detection-2309.12521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/profile-error-tolerant-target-speaker-voice-activity-detection-2309.12521"/></url>
<url><loc>https://scifaro.com/en/abs/crosssinger-a-cross-lingual-multi-singer-high-fidelity-singing-voice-synthesizer-trained-on-monolingual-singers-2309.12672</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crosssinger-a-cross-lingual-multi-singer-high-fidelity-singing-voice-synthesizer-trained-on-monolingual-singers-2309.12672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crosssinger-a-cross-lingual-multi-singer-high-fidelity-singing-voice-synthesizer-trained-on-monolingual-singers-2309.12672"/></url>
<url><loc>https://scifaro.com/en/abs/deepfake-audio-as-a-data-augmentation-technique-for-training-automatic-speech-to-text-transcription-models-2309.12802</loc><lastmod>2026-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfake-audio-as-a-data-augmentation-technique-for-training-automatic-speech-to-text-transcription-models-2309.12802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfake-audio-as-a-data-augmentation-technique-for-training-automatic-speech-to-text-transcription-models-2309.12802"/></url>
<url><loc>https://scifaro.com/en/abs/does-my-dog-speak-like-me-the-acoustic-correlation-between-pet-dogs-and-their-human-owners-2309.13085</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-my-dog-speak-like-me-the-acoustic-correlation-between-pet-dogs-and-their-human-owners-2309.13085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-my-dog-speak-like-me-the-acoustic-correlation-between-pet-dogs-and-their-human-owners-2309.13085"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lexical-analysis-of-dog-vocalizations-via-online-videos-2309.13086</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lexical-analysis-of-dog-vocalizations-via-online-videos-2309.13086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lexical-analysis-of-dog-vocalizations-via-online-videos-2309.13086"/></url>
<url><loc>https://scifaro.com/en/abs/invisible-watermarking-for-audio-generation-diffusion-models-2309.13166</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/invisible-watermarking-for-audio-generation-diffusion-models-2309.13166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/invisible-watermarking-for-audio-generation-diffusion-models-2309.13166"/></url>
<url><loc>https://scifaro.com/en/abs/two-vs-four-channel-sound-event-localization-and-detection-2309.13343</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-vs-four-channel-sound-event-localization-and-detection-2309.13343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-vs-four-channel-sound-event-localization-and-detection-2309.13343"/></url>
<url><loc>https://scifaro.com/en/abs/asca-less-audio-data-is-more-insightful-2309.13373</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asca-less-audio-data-is-more-insightful-2309.13373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asca-less-audio-data-is-more-insightful-2309.13373"/></url>
<url><loc>https://scifaro.com/en/abs/coco-nut-corpus-of-japanese-utterance-and-voice-characteristics-description-for-prompt-based-control-2309.13509</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coco-nut-corpus-of-japanese-utterance-and-voice-characteristics-description-for-prompt-based-control-2309.13509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coco-nut-corpus-of-japanese-utterance-and-voice-characteristics-description-for-prompt-based-control-2309.13509"/></url>
<url><loc>https://scifaro.com/en/abs/the-second-multi-channel-multi-party-meeting-transcription-challenge-m2met-2-0-a-benchmark-for-speaker-attributed-asr-2309.13573</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-second-multi-channel-multi-party-meeting-transcription-challenge-m2met-2-0-a-benchmark-for-speaker-attributed-asr-2309.13573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-second-multi-channel-multi-party-meeting-transcription-challenge-m2met-2-0-a-benchmark-for-speaker-attributed-asr-2309.13573"/></url>
<url><loc>https://scifaro.com/en/abs/hignn-tts-hierarchical-prosody-modeling-with-graph-neural-networks-for-expressive-long-form-tts-2309.13907</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hignn-tts-hierarchical-prosody-modeling-with-graph-neural-networks-for-expressive-long-form-tts-2309.13907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hignn-tts-hierarchical-prosody-modeling-with-graph-neural-networks-for-expressive-long-form-tts-2309.13907"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-emergency-vehicle-detection-using-mel-spectrograms-and-regular-expressions-2309.13920</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-emergency-vehicle-detection-using-mel-spectrograms-and-regular-expressions-2309.13920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-emergency-vehicle-detection-using-mel-spectrograms-and-regular-expressions-2309.13920"/></url>
<url><loc>https://scifaro.com/en/abs/audio-classification-with-dilated-convolution-with-learnable-spacings-2309.13972</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-classification-with-dilated-convolution-with-learnable-spacings-2309.13972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-classification-with-dilated-convolution-with-learnable-spacings-2309.13972"/></url>
<url><loc>https://scifaro.com/en/abs/voicelens-controllable-speaker-generation-and-editing-with-flow-2309.14094</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicelens-controllable-speaker-generation-and-editing-with-flow-2309.14094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicelens-controllable-speaker-generation-and-editing-with-flow-2309.14094"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relation-between-internal-language-model-and-sequence-discriminative-training-for-neural-transducers-2309.14130</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relation-between-internal-language-model-and-sequence-discriminative-training-for-neural-transducers-2309.14130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relation-between-internal-language-model-and-sequence-discriminative-training-for-neural-transducers-2309.14130"/></url>
<url><loc>https://scifaro.com/en/abs/multi-domain-adaptation-by-self-supervised-learning-for-speaker-verification-2309.14149</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-domain-adaptation-by-self-supervised-learning-for-speaker-verification-2309.14149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-domain-adaptation-by-self-supervised-learning-for-speaker-verification-2309.14149"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-distribution-alignment-in-multi-genre-speaker-recognition-2309.14158</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-distribution-alignment-in-multi-genre-speaker-recognition-2309.14158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-distribution-alignment-in-multi-genre-speaker-recognition-2309.14158"/></url>
<url><loc>https://scifaro.com/en/abs/towards-using-cough-for-respiratory-disease-diagnosis-by-leveraging-artificial-intelligence-a-survey-2309.14383</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-using-cough-for-respiratory-disease-diagnosis-by-leveraging-artificial-intelligence-a-survey-2309.14383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-using-cough-for-respiratory-disease-diagnosis-by-leveraging-artificial-intelligence-a-survey-2309.14383"/></url>
<url><loc>https://scifaro.com/en/abs/joint-audio-and-speech-understanding-2309.14405</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-audio-and-speech-understanding-2309.14405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-audio-and-speech-understanding-2309.14405"/></url>
<url><loc>https://scifaro.com/en/abs/speech-audio-synthesis-from-tagged-mri-and-non-negative-matrix-factorization-via-plastic-transformer-2309.14586</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-audio-synthesis-from-tagged-mri-and-non-negative-matrix-factorization-via-plastic-transformer-2309.14586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-audio-synthesis-from-tagged-mri-and-non-negative-matrix-factorization-via-plastic-transformer-2309.14586"/></url>
<url><loc>https://scifaro.com/en/abs/emphasized-non-target-speaker-knowledge-in-knowledge-distillation-for-automatic-speaker-verification-2309.14838</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emphasized-non-target-speaker-knowledge-in-knowledge-distillation-for-automatic-speaker-verification-2309.14838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emphasized-non-target-speaker-knowledge-in-knowledge-distillation-for-automatic-speaker-verification-2309.14838"/></url>
<url><loc>https://scifaro.com/en/abs/synthia-s-melody-a-benchmark-framework-for-unsupervised-domain-adaptation-in-audio-2309.15024</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthia-s-melody-a-benchmark-framework-for-unsupervised-domain-adaptation-in-audio-2309.15024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthia-s-melody-a-benchmark-framework-for-unsupervised-domain-adaptation-in-audio-2309.15024"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-minimal-supervision-all-using-diffusion-models-2309.15512</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-minimal-supervision-all-using-diffusion-models-2309.15512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-speech-synthesis-with-minimal-supervision-all-using-diffusion-models-2309.15512"/></url>
<url><loc>https://scifaro.com/en/abs/speech-collage-code-switched-audio-generation-by-collaging-monolingual-corpora-2309.15674</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-collage-code-switched-audio-generation-by-collaging-monolingual-corpora-2309.15674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-collage-code-switched-audio-generation-by-collaging-monolingual-corpora-2309.15674"/></url>
<url><loc>https://scifaro.com/en/abs/neural-acoustic-context-field-rendering-realistic-room-impulse-response-with-neural-fields-2309.15977</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-acoustic-context-field-rendering-realistic-room-impulse-response-with-neural-fields-2309.15977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-acoustic-context-field-rendering-realistic-room-impulse-response-with-neural-fields-2309.15977"/></url>
<url><loc>https://scifaro.com/en/abs/lae-st-moe-boosted-language-aware-encoder-using-speech-translation-auxiliary-task-for-e2e-code-switching-asr-2309.16178</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lae-st-moe-boosted-language-aware-encoder-using-speech-translation-auxiliary-task-for-e2e-code-switching-asr-2309.16178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lae-st-moe-boosted-language-aware-encoder-using-speech-translation-auxiliary-task-for-e2e-code-switching-asr-2309.16178"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-proximity-alignment-towards-human-perception-consistent-audio-tagging-by-aligning-with-label-text-description-2309.16265</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-proximity-alignment-towards-human-perception-consistent-audio-tagging-by-aligning-with-label-text-description-2309.16265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-proximity-alignment-towards-human-perception-consistent-audio-tagging-by-aligning-with-label-text-description-2309.16265"/></url>
<url><loc>https://scifaro.com/en/abs/nomad-unsupervised-learning-of-perceptual-embeddings-for-speech-enhancement-and-non-matching-reference-audio-quality-assessment-2309.16284</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nomad-unsupervised-learning-of-perceptual-embeddings-for-speech-enhancement-and-non-matching-reference-audio-quality-assessment-2309.16284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nomad-unsupervised-learning-of-perceptual-embeddings-for-speech-enhancement-and-non-matching-reference-audio-quality-assessment-2309.16284"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-performance-difficulty-from-piano-sheet-music-images-2309.16287</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-performance-difficulty-from-piano-sheet-music-images-2309.16287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-performance-difficulty-from-piano-sheet-music-images-2309.16287"/></url>
<url><loc>https://scifaro.com/en/abs/bringing-the-discussion-of-minima-sharpness-to-the-audio-domain-a-filter-normalised-evaluation-for-acoustic-scene-classification-2309.16369</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bringing-the-discussion-of-minima-sharpness-to-the-audio-domain-a-filter-normalised-evaluation-for-acoustic-scene-classification-2309.16369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bringing-the-discussion-of-minima-sharpness-to-the-audio-domain-a-filter-normalised-evaluation-for-acoustic-scene-classification-2309.16369"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-supervised-training-of-audio-transformers-for-music-representation-learning-2309.16418</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-supervised-training-of-audio-transformers-for-music-representation-learning-2309.16418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-supervised-training-of-audio-transformers-for-music-representation-learning-2309.16418"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speaker-verification-via-joint-cross-attention-2309.16569</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speaker-verification-via-joint-cross-attention-2309.16569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speaker-verification-via-joint-cross-attention-2309.16569"/></url>
<url><loc>https://scifaro.com/en/abs/reflow-tts-a-rectified-flow-model-for-high-fidelity-text-to-speech-2309.17056</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reflow-tts-a-rectified-flow-model-for-high-fidelity-text-to-speech-2309.17056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reflow-tts-a-rectified-flow-model-for-high-fidelity-text-to-speech-2309.17056"/></url>
<url><loc>https://scifaro.com/en/abs/rtfs-net-recurrent-time-frequency-modelling-for-efficient-audio-visual-speech-separation-2309.17189</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rtfs-net-recurrent-time-frequency-modelling-for-efficient-audio-visual-speech-separation-2309.17189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rtfs-net-recurrent-time-frequency-modelling-for-efficient-audio-visual-speech-separation-2309.17189"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-captioning-models-with-fine-grained-audio-features-text-embedding-supervision-and-llm-mix-up-augmentation-2309.17352</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-captioning-models-with-fine-grained-audio-features-text-embedding-supervision-and-llm-mix-up-augmentation-2309.17352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-captioning-models-with-fine-grained-audio-features-text-embedding-supervision-and-llm-mix-up-augmentation-2309.17352"/></url>
<url><loc>https://scifaro.com/en/abs/fewer-token-neural-speech-codec-with-time-invariant-codes-2310.00014</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fewer-token-neural-speech-codec-with-time-invariant-codes-2310.00014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fewer-token-neural-speech-codec-with-time-invariant-codes-2310.00014"/></url>
<url><loc>https://scifaro.com/en/abs/gass-generalizing-audio-source-separation-with-large-scale-data-2310.00140</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gass-generalizing-audio-source-separation-with-large-scale-data-2310.00140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gass-generalizing-audio-source-separation-with-large-scale-data-2310.00140"/></url>
<url><loc>https://scifaro.com/en/abs/active-learning-based-fine-tuning-framework-for-speech-emotion-recognition-2310.00283</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-learning-based-fine-tuning-framework-for-speech-emotion-recognition-2310.00283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-learning-based-fine-tuning-framework-for-speech-emotion-recognition-2310.00283"/></url>
<url><loc>https://scifaro.com/en/abs/pianist-identification-using-convolutional-neural-networks-2310.00699</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pianist-identification-using-convolutional-neural-networks-2310.00699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pianist-identification-using-convolutional-neural-networks-2310.00699"/></url>
<url><loc>https://scifaro.com/en/abs/uniaudio-an-audio-foundation-model-toward-universal-audio-generation-2310.00704</loc><lastmod>2024-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniaudio-an-audio-foundation-model-toward-universal-audio-generation-2310.00704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniaudio-an-audio-foundation-model-toward-universal-audio-generation-2310.00704"/></url>
<url><loc>https://scifaro.com/en/abs/f0-analysis-of-ghanaian-pop-singing-reveals-progressive-alignment-with-equal-temperament-over-the-past-three-decades-a-case-study-2310.00870</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f0-analysis-of-ghanaian-pop-singing-reveals-progressive-alignment-with-equal-temperament-over-the-past-three-decades-a-case-study-2310.00870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f0-analysis-of-ghanaian-pop-singing-reveals-progressive-alignment-with-equal-temperament-over-the-past-three-decades-a-case-study-2310.00870"/></url>
<url><loc>https://scifaro.com/en/abs/usee-unified-speech-enhancement-and-editing-with-conditional-diffusion-models-2310.00900</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usee-unified-speech-enhancement-and-editing-with-conditional-diffusion-models-2310.00900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usee-unified-speech-enhancement-and-editing-with-conditional-diffusion-models-2310.00900"/></url>
<url><loc>https://scifaro.com/en/abs/diffar-denoising-diffusion-autoregressive-model-for-raw-speech-waveform-generation-2310.01381</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffar-denoising-diffusion-autoregressive-model-for-raw-speech-waveform-generation-2310.01381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffar-denoising-diffusion-autoregressive-model-for-raw-speech-waveform-generation-2310.01381"/></url>
<url><loc>https://scifaro.com/en/abs/mel-band-roformer-for-music-source-separation-2310.01809</loc><lastmod>2023-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-band-roformer-for-music-source-separation-2310.01809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-band-roformer-for-music-source-separation-2310.01809"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-audios-using-acoustic-properties-for-emotion-representation-2310.02298</loc><lastmod>2023-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-audios-using-acoustic-properties-for-emotion-representation-2310.02298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-audios-using-acoustic-properties-for-emotion-representation-2310.02298"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-interpretable-representation-of-speaker-identity-via-perceptual-voice-qualities-2310.02497</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-interpretable-representation-of-speaker-identity-via-perceptual-voice-qualities-2310.02497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-interpretable-representation-of-speaker-identity-via-perceptual-voice-qualities-2310.02497"/></url>
<url><loc>https://scifaro.com/en/abs/shaping-the-epochal-individuality-and-generality-the-temporal-dynamics-of-uncertainty-and-prediction-error-in-musical-improvisation-2310.02518</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shaping-the-epochal-individuality-and-generality-the-temporal-dynamics-of-uncertainty-and-prediction-error-in-musical-improvisation-2310.02518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shaping-the-epochal-individuality-and-generality-the-temporal-dynamics-of-uncertainty-and-prediction-error-in-musical-improvisation-2310.02518"/></url>
<url><loc>https://scifaro.com/en/abs/improving-severity-preservation-of-healthy-to-pathological-voice-conversion-with-global-style-tokens-2310.02570</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-severity-preservation-of-healthy-to-pathological-voice-conversion-with-global-style-tokens-2310.02570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-severity-preservation-of-healthy-to-pathological-voice-conversion-with-global-style-tokens-2310.02570"/></url>
<url><loc>https://scifaro.com/en/abs/ba-moe-boundary-aware-mixture-of-experts-adapter-for-code-switching-speech-recognition-2310.02629</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ba-moe-boundary-aware-mixture-of-experts-adapter-for-code-switching-speech-recognition-2310.02629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ba-moe-boundary-aware-mixture-of-experts-adapter-for-code-switching-speech-recognition-2310.02629"/></url>
<url><loc>https://scifaro.com/en/abs/multi-resolution-hubert-multi-resolution-speech-self-supervised-learning-with-masked-unit-prediction-2310.02720</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-resolution-hubert-multi-resolution-speech-self-supervised-learning-with-masked-unit-prediction-2310.02720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-resolution-hubert-multi-resolution-speech-self-supervised-learning-with-masked-unit-prediction-2310.02720"/></url>
<url><loc>https://scifaro.com/en/abs/an-integrated-algorithm-for-robust-and-imperceptible-audio-adversarial-examples-2310.03349</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-integrated-algorithm-for-robust-and-imperceptible-audio-adversarial-examples-2310.03349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-integrated-algorithm-for-robust-and-imperceptible-audio-adversarial-examples-2310.03349"/></url>
<url><loc>https://scifaro.com/en/abs/deep-generative-models-of-music-expectation-2310.03500</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-generative-models-of-music-expectation-2310.03500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-generative-models-of-music-expectation-2310.03500"/></url>
<url><loc>https://scifaro.com/en/abs/securing-voice-biometrics-one-shot-learning-approach-for-audio-deepfake-detection-2310.03856</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/securing-voice-biometrics-one-shot-learning-approach-for-audio-deepfake-detection-2310.03856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/securing-voice-biometrics-one-shot-learning-approach-for-audio-deepfake-detection-2310.03856"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-models-as-masked-audio-video-learners-2310.03937</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-models-as-masked-audio-video-learners-2310.03937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-models-as-masked-audio-video-learners-2310.03937"/></url>
<url><loc>https://scifaro.com/en/abs/effuse-efficient-self-supervised-feature-fusion-for-e2e-asr-in-low-resource-and-multilingual-scenarios-2310.03938</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effuse-efficient-self-supervised-feature-fusion-for-e2e-asr-in-low-resource-and-multilingual-scenarios-2310.03938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effuse-efficient-self-supervised-feature-fusion-for-e2e-asr-in-low-resource-and-multilingual-scenarios-2310.03938"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-emotion-transfer-for-cross-lingual-speech-synthesis-2310.03963</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-emotion-transfer-for-cross-lingual-speech-synthesis-2310.03963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-emotion-transfer-for-cross-lingual-speech-synthesis-2310.03963"/></url>
<url><loc>https://scifaro.com/en/abs/hubertopic-enhancing-semantic-representation-of-hubert-through-self-supervision-utilizing-topic-model-2310.03975</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hubertopic-enhancing-semantic-representation-of-hubert-through-self-supervision-utilizing-topic-model-2310.03975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hubertopic-enhancing-semantic-representation-of-hubert-through-self-supervision-utilizing-topic-model-2310.03975"/></url>
<url><loc>https://scifaro.com/en/abs/layer-adapted-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2310.03992</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/layer-adapted-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2310.03992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/layer-adapted-implicit-distribution-alignment-networks-for-cross-corpus-speech-emotion-recognition-2310.03992"/></url>
<url><loc>https://scifaro.com/en/abs/u-style-cascading-u-nets-with-multi-level-speaker-and-style-modeling-for-zero-shot-voice-cloning-2310.04004</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-style-cascading-u-nets-with-multi-level-speaker-and-style-modeling-for-zero-shot-voice-cloning-2310.04004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-style-cascading-u-nets-with-multi-level-speaker-and-style-modeling-for-zero-shot-voice-cloning-2310.04004"/></url>
<url><loc>https://scifaro.com/en/abs/mbtfnet-multi-band-temporal-frequency-neural-network-for-singing-voice-enhancement-2310.04369</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mbtfnet-multi-band-temporal-frequency-neural-network-for-singing-voice-enhancement-2310.04369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mbtfnet-multi-band-temporal-frequency-neural-network-for-singing-voice-enhancement-2310.04369"/></url>
<url><loc>https://scifaro.com/en/abs/neural2speech-a-transfer-learning-framework-for-neural-driven-speech-reconstruction-2310.04644</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural2speech-a-transfer-learning-framework-for-neural-driven-speech-reconstruction-2310.04644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural2speech-a-transfer-learning-framework-for-neural-driven-speech-reconstruction-2310.04644"/></url>
<url><loc>https://scifaro.com/en/abs/lauragpt-listen-attend-understand-and-regenerate-audio-with-gpt-2310.04673</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lauragpt-listen-attend-understand-and-regenerate-audio-with-gpt-2310.04673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lauragpt-listen-attend-understand-and-regenerate-audio-with-gpt-2310.04673"/></url>
<url><loc>https://scifaro.com/en/abs/voiceextender-short-utterance-text-independent-speaker-verification-with-guided-diffusion-model-2310.04681</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceextender-short-utterance-text-independent-speaker-verification-with-guided-diffusion-model-2310.04681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceextender-short-utterance-text-independent-speaker-verification-with-guided-diffusion-model-2310.04681"/></url>
<url><loc>https://scifaro.com/en/abs/a-holistic-evaluation-of-piano-sound-quality-2310.04722</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-holistic-evaluation-of-piano-sound-quality-2310.04722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-holistic-evaluation-of-piano-sound-quality-2310.04722"/></url>
<url><loc>https://scifaro.com/en/abs/fm-tone-transfer-with-envelope-learning-2310.04811</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fm-tone-transfer-with-envelope-learning-2310.04811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fm-tone-transfer-with-envelope-learning-2310.04811"/></url>
<url><loc>https://scifaro.com/en/abs/sa-paraformer-non-autoregressive-end-to-end-speaker-attributed-asr-2310.04863</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-paraformer-non-autoregressive-end-to-end-speaker-attributed-asr-2310.04863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-paraformer-non-autoregressive-end-to-end-speaker-attributed-asr-2310.04863"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-transfer-learning-in-deep-learning-text-to-speech-models-on-a-few-shot-low-resource-customized-dataset-2310.04982</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-transfer-learning-in-deep-learning-text-to-speech-models-on-a-few-shot-low-resource-customized-dataset-2310.04982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-transfer-learning-in-deep-learning-text-to-speech-models-on-a-few-shot-low-resource-customized-dataset-2310.04982"/></url>
<url><loc>https://scifaro.com/en/abs/promptspeaker-speaker-generation-based-on-text-descriptions-2310.05001</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/promptspeaker-speaker-generation-based-on-text-descriptions-2310.05001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/promptspeaker-speaker-generation-based-on-text-descriptions-2310.05001"/></url>
<url><loc>https://scifaro.com/en/abs/salt-distinguishable-speaker-anonymization-through-latent-space-transformation-2310.05051</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salt-distinguishable-speaker-anonymization-through-latent-space-transformation-2310.05051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salt-distinguishable-speaker-anonymization-through-latent-space-transformation-2310.05051"/></url>
<url><loc>https://scifaro.com/en/abs/vits-based-singing-voice-conversion-system-with-dspgan-post-processing-for-svcc2023-2310.05118</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vits-based-singing-voice-conversion-system-with-dspgan-post-processing-for-svcc2023-2310.05118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vits-based-singing-voice-conversion-system-with-dspgan-post-processing-for-svcc2023-2310.05118"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-investigation-of-neural-replay-simulator-for-over-the-air-adversarial-perturbations-to-automatic-speaker-verification-2310.05354</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-investigation-of-neural-replay-simulator-for-over-the-air-adversarial-perturbations-to-automatic-speaker-verification-2310.05354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-investigation-of-neural-replay-simulator-for-over-the-air-adversarial-perturbations-to-automatic-speaker-verification-2310.05354"/></url>
<url><loc>https://scifaro.com/en/abs/advsv-an-over-the-air-adversarial-attack-dataset-for-speaker-verification-2310.05369</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advsv-an-over-the-air-adversarial-attack-dataset-for-speaker-verification-2310.05369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advsv-an-over-the-air-adversarial-attack-dataset-for-speaker-verification-2310.05369"/></url>
<url><loc>https://scifaro.com/en/abs/findings-of-the-2023-ml-superb-challenge-pre-training-and-evaluation-over-more-languages-and-beyond-2310.05513</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/findings-of-the-2023-ml-superb-challenge-pre-training-and-evaluation-over-more-languages-and-beyond-2310.05513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/findings-of-the-2023-ml-superb-challenge-pre-training-and-evaluation-over-more-languages-and-beyond-2310.05513"/></url>
<url><loc>https://scifaro.com/en/abs/audio-compression-assisted-feature-extraction-for-voice-replay-attack-detection-2310.05813</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-compression-assisted-feature-extraction-for-voice-replay-attack-detection-2310.05813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-compression-assisted-feature-extraction-for-voice-replay-attack-detection-2310.05813"/></url>
<url><loc>https://scifaro.com/en/abs/pre-trained-spatial-priors-on-multichannel-nmf-for-music-source-separation-2310.05821</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-trained-spatial-priors-on-multichannel-nmf-for-music-source-separation-2310.05821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-trained-spatial-priors-on-multichannel-nmf-for-music-source-separation-2310.05821"/></url>
<url><loc>https://scifaro.com/en/abs/jvnv-a-corpus-of-japanese-emotional-speech-with-verbal-content-and-nonverbal-expressions-2310.06072</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jvnv-a-corpus-of-japanese-emotional-speech-with-verbal-content-and-nonverbal-expressions-2310.06072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jvnv-a-corpus-of-japanese-emotional-speech-with-verbal-content-and-nonverbal-expressions-2310.06072"/></url>
<url><loc>https://scifaro.com/en/abs/on-time-domain-conformer-models-for-monaural-speech-separation-in-noisy-reverberant-acoustic-environments-2310.06125</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-time-domain-conformer-models-for-monaural-speech-separation-in-noisy-reverberant-acoustic-environments-2310.06125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-time-domain-conformer-models-for-monaural-speech-separation-in-noisy-reverberant-acoustic-environments-2310.06125"/></url>
<url><loc>https://scifaro.com/en/abs/an-experiment-on-an-automated-literature-survey-of-data-driven-speech-enhancement-methods-2310.06260</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experiment-on-an-automated-literature-survey-of-data-driven-speech-enhancement-methods-2310.06260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experiment-on-an-automated-literature-survey-of-data-driven-speech-enhancement-methods-2310.06260"/></url>
<url><loc>https://scifaro.com/en/abs/noisy-arcmix-additive-noisy-angular-margin-loss-combined-with-mixup-anomalous-sound-detection-2310.06364</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisy-arcmix-additive-noisy-angular-margin-loss-combined-with-mixup-anomalous-sound-detection-2310.06364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisy-arcmix-additive-noisy-angular-margin-loss-combined-with-mixup-anomalous-sound-detection-2310.06364"/></url>
<url><loc>https://scifaro.com/en/abs/topological-data-analysis-of-human-vowels-persistent-homologies-across-representation-spaces-2310.06508</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topological-data-analysis-of-human-vowels-persistent-homologies-across-representation-spaces-2310.06508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topological-data-analysis-of-human-vowels-persistent-homologies-across-representation-spaces-2310.06508"/></url>
<url><loc>https://scifaro.com/en/abs/autocycle-vc-towards-bottleneck-independent-zero-shot-cross-lingual-voice-conversion-2310.06546</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autocycle-vc-towards-bottleneck-independent-zero-shot-cross-lingual-voice-conversion-2310.06546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autocycle-vc-towards-bottleneck-independent-zero-shot-cross-lingual-voice-conversion-2310.06546"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-analysis-of-audiobooks-2310.06930</loc><lastmod>2025-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-analysis-of-audiobooks-2310.06930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-analysis-of-audiobooks-2310.06930"/></url>
<url><loc>https://scifaro.com/en/abs/neural-harmonium-an-interpretable-deep-structure-for-nonlinear-dynamic-system-identification-with-application-to-audio-processing-2310.07032</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-harmonium-an-interpretable-deep-structure-for-nonlinear-dynamic-system-identification-with-application-to-audio-processing-2310.07032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-harmonium-an-interpretable-deep-structure-for-nonlinear-dynamic-system-identification-with-application-to-audio-processing-2310.07032"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-model-fusion-for-end-to-end-speech-recognition-2310.07062</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-model-fusion-for-end-to-end-speech-recognition-2310.07062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-model-fusion-for-end-to-end-speech-recognition-2310.07062"/></url>
<url><loc>https://scifaro.com/en/abs/llark-a-multimodal-instruction-following-language-model-for-music-2310.07160</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llark-a-multimodal-instruction-following-language-model-for-music-2310.07160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llark-a-multimodal-instruction-following-language-model-for-music-2310.07160"/></url>
<url><loc>https://scifaro.com/en/abs/psychoacoustic-challenges-of-speech-enhancement-on-voip-platforms-2310.07161</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psychoacoustic-challenges-of-speech-enhancement-on-voip-platforms-2310.07161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psychoacoustic-challenges-of-speech-enhancement-on-voip-platforms-2310.07161"/></url>
<url><loc>https://scifaro.com/en/abs/vec-tok-speech-speech-vectorization-and-tokenization-for-neural-speech-generation-2310.07246</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vec-tok-speech-speech-vectorization-and-tokenization-for-neural-speech-generation-2310.07246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vec-tok-speech-speech-vectorization-and-tokenization-for-neural-speech-generation-2310.07246"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-expressivity-transfer-in-textless-speech-to-speech-translation-2310.07279</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-expressivity-transfer-in-textless-speech-to-speech-translation-2310.07279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-expressivity-transfer-in-textless-speech-to-speech-translation-2310.07279"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-time-and-note-duration-tokenizations-on-deep-learning-symbolic-music-modeling-2310.08497</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-time-and-note-duration-tokenizations-on-deep-learning-symbolic-music-modeling-2310.08497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-time-and-note-duration-tokenizations-on-deep-learning-symbolic-music-modeling-2310.08497"/></url>
<url><loc>https://scifaro.com/en/abs/compa-addressing-the-gap-in-compositional-reasoning-in-audio-language-models-2310.08753</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compa-addressing-the-gap-in-compositional-reasoning-in-audio-language-models-2310.08753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compa-addressing-the-gap-in-compositional-reasoning-in-audio-language-models-2310.08753"/></url>
<url><loc>https://scifaro.com/en/abs/dual-branch-knowledge-distillation-for-noise-robust-synthetic-speech-detection-2310.08869</loc><lastmod>2024-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-branch-knowledge-distillation-for-noise-robust-synthetic-speech-detection-2310.08869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-branch-knowledge-distillation-for-noise-robust-synthetic-speech-detection-2310.08869"/></url>
<url><loc>https://scifaro.com/en/abs/differential-evolution-algorithm-based-hyper-parameters-selection-of-convolutional-neural-network-for-speech-command-recognition-2310.08914</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differential-evolution-algorithm-based-hyper-parameters-selection-of-convolutional-neural-network-for-speech-command-recognition-2310.08914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differential-evolution-algorithm-based-hyper-parameters-selection-of-convolutional-neural-network-for-speech-command-recognition-2310.08914"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-autoencoder-with-id-constraint-for-unsupervised-anomalous-sound-detection-2310.08950</loc><lastmod>2023-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-autoencoder-with-id-constraint-for-unsupervised-anomalous-sound-detection-2310.08950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-autoencoder-with-id-constraint-for-unsupervised-anomalous-sound-detection-2310.08950"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-speech-enhancement-via-speech-token-generation-2310.08981</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-speech-enhancement-via-speech-token-generation-2310.08981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-speech-enhancement-via-speech-token-generation-2310.08981"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-test-time-adaptation-in-wild-acoustic-test-settings-2310.09505</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-test-time-adaptation-in-wild-acoustic-test-settings-2310.09505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-test-time-adaptation-in-wild-acoustic-test-settings-2310.09505"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-prediction-of-full-ocean-depth-ssp-by-hierarchical-lstm-an-experimental-result-2310.09522</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-prediction-of-full-ocean-depth-ssp-by-hierarchical-lstm-an-experimental-result-2310.09522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-prediction-of-full-ocean-depth-ssp-by-hierarchical-lstm-an-experimental-result-2310.09522"/></url>
<url><loc>https://scifaro.com/en/abs/selfvc-voice-conversion-with-iterative-refinement-using-self-transformations-2310.09653</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selfvc-voice-conversion-with-iterative-refinement-using-self-transformations-2310.09653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selfvc-voice-conversion-with-iterative-refinement-using-self-transformations-2310.09653"/></url>
<url><loc>https://scifaro.com/en/abs/cocoformer-a-controllable-feature-rich-polyphonic-music-generation-method-2310.09843</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cocoformer-a-controllable-feature-rich-polyphonic-music-generation-method-2310.09843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cocoformer-a-controllable-feature-rich-polyphonic-music-generation-method-2310.09843"/></url>
<url><loc>https://scifaro.com/en/abs/mertech-instrument-playing-technique-detection-using-self-supervised-pretrained-model-with-multi-task-finetuning-2310.09853</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mertech-instrument-playing-technique-detection-using-self-supervised-pretrained-model-with-multi-task-finetuning-2310.09853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mertech-instrument-playing-technique-detection-using-self-supervised-pretrained-model-with-multi-task-finetuning-2310.09853"/></url>
<url><loc>https://scifaro.com/en/abs/joint-music-and-language-attention-models-for-zero-shot-music-tagging-2310.10159</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-music-and-language-attention-models-for-zero-shot-music-tagging-2310.10159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-music-and-language-attention-models-for-zero-shot-music-tagging-2310.10159"/></url>
<url><loc>https://scifaro.com/en/abs/beatdance-a-beat-based-model-agnostic-contrastive-learning-framework-for-music-dance-retrieval-2310.10300</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beatdance-a-beat-based-model-agnostic-contrastive-learning-framework-for-music-dance-retrieval-2310.10300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beatdance-a-beat-based-model-agnostic-contrastive-learning-framework-for-music-dance-retrieval-2310.10300"/></url>
<url><loc>https://scifaro.com/en/abs/locselect-target-speaker-localization-with-an-auditory-selective-hearing-mechanism-2310.10497</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locselect-target-speaker-localization-with-an-auditory-selective-hearing-mechanism-2310.10497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locselect-target-speaker-localization-with-an-auditory-selective-hearing-mechanism-2310.10497"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-lead-sheet-generation-via-semantic-compression-2310.10772</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-lead-sheet-generation-via-semantic-compression-2310.10772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-lead-sheet-generation-via-semantic-compression-2310.10772"/></url>
<url><loc>https://scifaro.com/en/abs/a-high-fidelity-and-low-complexity-neural-audio-coding-2310.10992</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-high-fidelity-and-low-complexity-neural-audio-coding-2310.10992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-high-fidelity-and-low-complexity-neural-audio-coding-2310.10992"/></url>
<url><loc>https://scifaro.com/en/abs/lyricist-singer-entropy-affects-lyric-lyricist-classification-performance-2310.11035</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lyricist-singer-entropy-affects-lyric-lyricist-classification-performance-2310.11035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lyricist-singer-entropy-affects-lyric-lyricist-classification-performance-2310.11035"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-diverse-semantic-based-audio-pretrained-models-for-singing-voice-conversion-2310.11160</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-diverse-semantic-based-audio-pretrained-models-for-singing-voice-conversion-2310.11160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-diverse-semantic-based-audio-pretrained-models-for-singing-voice-conversion-2310.11160"/></url>
<url><loc>https://scifaro.com/en/abs/serenade-a-model-for-human-in-the-loop-automatic-chord-estimation-2310.11165</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serenade-a-model-for-human-in-the-loop-automatic-chord-estimation-2310.11165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serenade-a-model-for-human-in-the-loop-automatic-chord-estimation-2310.11165"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-noise-reduction-with-differentiable-signal-processing-2310.11364</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-noise-reduction-with-differentiable-signal-processing-2310.11364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-noise-reduction-with-differentiable-signal-processing-2310.11364"/></url>
<url><loc>https://scifaro.com/en/abs/robust-wake-up-word-detection-by-two-stage-multi-resolution-ensembles-2310.11379</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-wake-up-word-detection-by-two-stage-multi-resolution-ensembles-2310.11379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-wake-up-word-detection-by-two-stage-multi-resolution-ensembles-2310.11379"/></url>
<url><loc>https://scifaro.com/en/abs/echoscan-scanning-complex-room-geometries-via-acoustic-echoes-2310.11728</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echoscan-scanning-complex-room-geometries-via-acoustic-echoes-2310.11728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echoscan-scanning-complex-room-geometries-via-acoustic-echoes-2310.11728"/></url>
<url><loc>https://scifaro.com/en/abs/blind-estimation-of-audio-effects-using-an-auto-encoder-approach-and-differentiable-digital-signal-processing-2310.11781</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-estimation-of-audio-effects-using-an-auto-encoder-approach-and-differentiable-digital-signal-processing-2310.11781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-estimation-of-audio-effects-using-an-auto-encoder-approach-and-differentiable-digital-signal-processing-2310.11781"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-neural-network-for-acoustic-resonance-analysis-in-a-one-dimensional-acoustic-tube-2310.11804</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-neural-network-for-acoustic-resonance-analysis-in-a-one-dimensional-acoustic-tube-2310.11804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-neural-network-for-acoustic-resonance-analysis-in-a-one-dimensional-acoustic-tube-2310.11804"/></url>
<url><loc>https://scifaro.com/en/abs/clara-multilingual-contrastive-learning-for-audio-representation-acquisition-2310.11830</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clara-multilingual-contrastive-learning-for-audio-representation-acquisition-2310.11830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clara-multilingual-contrastive-learning-for-audio-representation-acquisition-2310.11830"/></url>
<url><loc>https://scifaro.com/en/abs/but-chime-7-system-description-2310.11921</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-chime-7-system-description-2310.11921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-chime-7-system-description-2310.11921"/></url>
<url><loc>https://scifaro.com/en/abs/take-the-atrain-introducing-an-interface-for-the-accessible-transcription-of-interviews-2310.11967</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/take-the-atrain-introducing-an-interface-for-the-accessible-transcription-of-interviews-2310.11967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/take-the-atrain-introducing-an-interface-for-the-accessible-transcription-of-interviews-2310.11967"/></url>
<url><loc>https://scifaro.com/en/abs/loop-copilot-conducting-ai-ensembles-for-music-generation-and-iterative-editing-2310.12404</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loop-copilot-conducting-ai-ensembles-for-music-generation-and-iterative-editing-2310.12404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loop-copilot-conducting-ai-ensembles-for-music-generation-and-iterative-editing-2310.12404"/></url>
<url><loc>https://scifaro.com/en/abs/energy-based-models-for-speech-synthesis-2310.12765</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/energy-based-models-for-speech-synthesis-2310.12765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/energy-based-models-for-speech-synthesis-2310.12765"/></url>
<url><loc>https://scifaro.com/en/abs/emodiarize-speaker-diarization-and-emotion-identification-from-speech-signals-using-convolutional-neural-networks-2310.12851</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emodiarize-speaker-diarization-and-emotion-identification-from-speech-signals-using-convolutional-neural-networks-2310.12851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emodiarize-speaker-diarization-and-emotion-identification-from-speech-signals-using-convolutional-neural-networks-2310.12851"/></url>
<url><loc>https://scifaro.com/en/abs/audio-editing-with-non-rigid-text-prompts-2310.12858</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-editing-with-non-rigid-text-prompts-2310.12858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-editing-with-non-rigid-text-prompts-2310.12858"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-quantification-of-bandgaps-in-acoustic-metamaterials-with-stochastic-geometric-defects-and-material-properties-2310.12869</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-quantification-of-bandgaps-in-acoustic-metamaterials-with-stochastic-geometric-defects-and-material-properties-2310.12869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-quantification-of-bandgaps-in-acoustic-metamaterials-with-stochastic-geometric-defects-and-material-properties-2310.12869"/></url>
<url><loc>https://scifaro.com/en/abs/powerset-multi-class-cross-entropy-loss-for-neural-speaker-diarization-2310.13025</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/powerset-multi-class-cross-entropy-loss-for-neural-speaker-diarization-2310.13025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/powerset-multi-class-cross-entropy-loss-for-neural-speaker-diarization-2310.13025"/></url>
<url><loc>https://scifaro.com/en/abs/salmonn-towards-generic-hearing-abilities-for-large-language-models-2310.13289</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salmonn-towards-generic-hearing-abilities-for-large-language-models-2310.13289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salmonn-towards-generic-hearing-abilities-for-large-language-models-2310.13289"/></url>
<url><loc>https://scifaro.com/en/abs/music-augmentation-and-denoising-for-peak-based-audio-fingerprinting-2310.13388</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-augmentation-and-denoising-for-peak-based-audio-fingerprinting-2310.13388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-augmentation-and-denoising-for-peak-based-audio-fingerprinting-2310.13388"/></url>
<url><loc>https://scifaro.com/en/abs/definition-independent-formalization-of-soundscapes-towards-a-formal-methodology-2310.13404</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/definition-independent-formalization-of-soundscapes-towards-a-formal-methodology-2310.13404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/definition-independent-formalization-of-soundscapes-towards-a-formal-methodology-2310.13404"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-triplet-loss-training-with-curriculum-augmentation-for-audio-visual-retrieval-2310.13451</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-triplet-loss-training-with-curriculum-augmentation-for-audio-visual-retrieval-2310.13451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-triplet-loss-training-with-curriculum-augmentation-for-audio-visual-retrieval-2310.13451"/></url>
<url><loc>https://scifaro.com/en/abs/multi-label-open-set-audio-classification-2310.13759</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-label-open-set-audio-classification-2310.13759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-label-open-set-audio-classification-2310.13759"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-convolutional-neural-networks-to-generate-a-head-related-impulse-response-from-one-direction-to-another-2310.14018</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-convolutional-neural-networks-to-generate-a-head-related-impulse-response-from-one-direction-to-another-2310.14018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-convolutional-neural-networks-to-generate-a-head-related-impulse-response-from-one-direction-to-another-2310.14018"/></url>
<url><loc>https://scifaro.com/en/abs/fast-diffusion-gan-model-for-symbolic-music-generation-controlled-by-emotions-2310.14040</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-diffusion-gan-model-for-symbolic-music-generation-controlled-by-emotions-2310.14040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-diffusion-gan-model-for-symbolic-music-generation-controlled-by-emotions-2310.14040"/></url>
<url><loc>https://scifaro.com/en/abs/composer-style-specific-symbolic-music-generation-using-vector-quantized-discrete-diffusion-models-2310.14044</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composer-style-specific-symbolic-music-generation-using-vector-quantized-discrete-diffusion-models-2310.14044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composer-style-specific-symbolic-music-generation-using-vector-quantized-discrete-diffusion-models-2310.14044"/></url>
<url><loc>https://scifaro.com/en/abs/first-shot-unsupervised-anomalous-sound-detection-with-unknown-anomalies-estimated-by-metadata-assisted-audio-generation-2310.14173</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/first-shot-unsupervised-anomalous-sound-detection-with-unknown-anomalies-estimated-by-metadata-assisted-audio-generation-2310.14173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/first-shot-unsupervised-anomalous-sound-detection-with-unknown-anomalies-estimated-by-metadata-assisted-audio-generation-2310.14173"/></url>
<url><loc>https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-audio-textual-cross-modal-contextual-representation-2310.14278</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-audio-textual-cross-modal-contextual-representation-2310.14278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conversational-speech-recognition-by-learning-audio-textual-cross-modal-contextual-representation-2310.14278"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-bpe-for-speech-generation-with-discrete-tokens-2310.14580</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-bpe-for-speech-generation-with-discrete-tokens-2310.14580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-bpe-for-speech-generation-with-discrete-tokens-2310.14580"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-transfer-learning-method-utilizing-acoustic-and-vibration-signals-for-rotating-machinery-fault-diagnosis-2310.14796</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-transfer-learning-method-utilizing-acoustic-and-vibration-signals-for-rotating-machinery-fault-diagnosis-2310.14796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-transfer-learning-method-utilizing-acoustic-and-vibration-signals-for-rotating-machinery-fault-diagnosis-2310.14796"/></url>
<url><loc>https://scifaro.com/en/abs/8-8-4-formalizing-time-units-to-handle-symbolic-music-durations-2310.14952</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/8-8-4-formalizing-time-units-to-handle-symbolic-music-durations-2310.14952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/8-8-4-formalizing-time-units-to-handle-symbolic-music-durations-2310.14952"/></url>
<url><loc>https://scifaro.com/en/abs/key-frame-mechanism-for-efficient-conformer-based-end-to-end-speech-recognition-2310.14954</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/key-frame-mechanism-for-efficient-conformer-based-end-to-end-speech-recognition-2310.14954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/key-frame-mechanism-for-efficient-conformer-based-end-to-end-speech-recognition-2310.14954"/></url>
<url><loc>https://scifaro.com/en/abs/novel-view-acoustic-synthesis-from-3d-reconstructed-rooms-2310.15130</loc><lastmod>2024-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-view-acoustic-synthesis-from-3d-reconstructed-rooms-2310.15130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-view-acoustic-synthesis-from-3d-reconstructed-rooms-2310.15130"/></url>
<url><loc>https://scifaro.com/en/abs/syncfusion-multimodal-onset-synchronized-video-to-audio-foley-synthesis-2310.15247</loc><lastmod>2023-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syncfusion-multimodal-onset-synchronized-video-to-audio-foley-synthesis-2310.15247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syncfusion-multimodal-onset-synchronized-video-to-audio-foley-synthesis-2310.15247"/></url>
<url><loc>https://scifaro.com/en/abs/modality-dropout-for-multimodal-device-directed-speech-detection-using-verbal-and-non-verbal-features-2310.15261</loc><lastmod>2023-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modality-dropout-for-multimodal-device-directed-speech-detection-using-verbal-and-non-verbal-features-2310.15261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modality-dropout-for-multimodal-device-directed-speech-detection-using-verbal-and-non-verbal-features-2310.15261"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-convolutional-neural-networks-as-efficient-pre-trained-audio-models-2310.15648</loc><lastmod>2023-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-convolutional-neural-networks-as-efficient-pre-trained-audio-models-2310.15648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-convolutional-neural-networks-as-efficient-pre-trained-audio-models-2310.15648"/></url>
<url><loc>https://scifaro.com/en/abs/cdsd-chinese-dysarthria-speech-database-2310.15930</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cdsd-chinese-dysarthria-speech-database-2310.15930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cdsd-chinese-dysarthria-speech-database-2310.15930"/></url>
<url><loc>https://scifaro.com/en/abs/complex-image-generation-swintransformer-network-for-audio-denoising-2310.16109</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-image-generation-swintransformer-network-for-audio-denoising-2310.16109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-image-generation-swintransformer-network-for-audio-denoising-2310.16109"/></url>
<url><loc>https://scifaro.com/en/abs/towards-streaming-speech-to-avatar-synthesis-2310.16287</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-streaming-speech-to-avatar-synthesis-2310.16287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-streaming-speech-to-avatar-synthesis-2310.16287"/></url>
<url><loc>https://scifaro.com/en/abs/structured-multi-track-accompaniment-arrangement-via-style-prior-modelling-2310.16334</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structured-multi-track-accompaniment-arrangement-via-style-prior-modelling-2310.16334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structured-multi-track-accompaniment-arrangement-via-style-prior-modelling-2310.16334"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-processing-neural-network-architecture-for-hearing-loss-compensation-2310.16550</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-processing-neural-network-architecture-for-hearing-loss-compensation-2310.16550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-processing-neural-network-architecture-for-hearing-loss-compensation-2310.16550"/></url>
<url><loc>https://scifaro.com/en/abs/learning-repeatable-speech-embeddings-using-an-intra-class-correlation-regularizer-2310.17049</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-repeatable-speech-embeddings-using-an-intra-class-correlation-regularizer-2310.17049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-repeatable-speech-embeddings-using-an-intra-class-correlation-regularizer-2310.17049"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-generation-of-artificial-speaker-embeddings-through-discovery-of-principal-directions-2310.17502</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-generation-of-artificial-speaker-embeddings-through-discovery-of-principal-directions-2310.17502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-generation-of-artificial-speaker-embeddings-through-discovery-of-principal-directions-2310.17502"/></url>
<url><loc>https://scifaro.com/en/abs/developing-a-multilingual-dataset-and-evaluation-metrics-for-code-switching-a-focus-on-hong-kong-s-polylingual-dynamics-2310.17953</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-a-multilingual-dataset-and-evaluation-metrics-for-code-switching-a-focus-on-hong-kong-s-polylingual-dynamics-2310.17953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-a-multilingual-dataset-and-evaluation-metrics-for-code-switching-a-focus-on-hong-kong-s-polylingual-dynamics-2310.17953"/></url>
<url><loc>https://scifaro.com/en/abs/style-description-based-text-to-speech-with-conditional-prosodic-layer-normalization-based-diffusion-gan-2310.18169</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/style-description-based-text-to-speech-with-conditional-prosodic-layer-normalization-based-diffusion-gan-2310.18169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/style-description-based-text-to-speech-with-conditional-prosodic-layer-normalization-based-diffusion-gan-2310.18169"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-emotional-landscape-of-music-an-analysis-of-valence-trends-and-genre-variations-in-spotify-music-data-2310.19052</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-emotional-landscape-of-music-an-analysis-of-valence-trends-and-genre-variations-in-spotify-music-data-2310.19052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-emotional-landscape-of-music-an-analysis-of-valence-trends-and-genre-variations-in-spotify-music-data-2310.19052"/></url>
<url><loc>https://scifaro.com/en/abs/feature-aggregation-in-joint-sound-classification-and-localization-neural-networks-2310.19063</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-aggregation-in-joint-sound-classification-and-localization-neural-networks-2310.19063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-aggregation-in-joint-sound-classification-and-localization-neural-networks-2310.19063"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-analyzer-a-framework-to-industrialize-the-research-on-audio-forensics-2310.19081</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-analyzer-a-framework-to-industrialize-the-research-on-audio-forensics-2310.19081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-analyzer-a-framework-to-industrialize-the-research-on-audio-forensics-2310.19081"/></url>
<url><loc>https://scifaro.com/en/abs/jen-1-composer-a-unified-framework-for-high-fidelity-multi-track-music-generation-2310.19180</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jen-1-composer-a-unified-framework-for-high-fidelity-multi-track-music-generation-2310.19180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jen-1-composer-a-unified-framework-for-high-fidelity-multi-track-music-generation-2310.19180"/></url>
<url><loc>https://scifaro.com/en/abs/dpatd-dual-phase-audio-transformer-for-denoising-2310.19588</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpatd-dual-phase-audio-transformer-for-denoising-2310.19588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpatd-dual-phase-audio-transformer-for-denoising-2310.19588"/></url>
<url><loc>https://scifaro.com/en/abs/dcht-deep-complex-hybrid-transformer-for-speech-enhancement-2310.19602</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcht-deep-complex-hybrid-transformer-for-speech-enhancement-2310.19602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcht-deep-complex-hybrid-transformer-for-speech-enhancement-2310.19602"/></url>
<url><loc>https://scifaro.com/en/abs/musical-form-generation-2310.19842</loc><lastmod>2023-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-form-generation-2310.19842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-form-generation-2310.19842"/></url>
<url><loc>https://scifaro.com/en/abs/lavss-location-guided-audio-visual-spatial-audio-separation-2310.20446</loc><lastmod>2023-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lavss-location-guided-audio-visual-spatial-audio-separation-2310.20446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lavss-location-guided-audio-visual-spatial-audio-separation-2310.20446"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-syllable-level-pronunciation-stress-with-a-self-attention-model-2311.00301</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-syllable-level-pronunciation-stress-with-a-self-attention-model-2311.00301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-syllable-level-pronunciation-stress-with-a-self-attention-model-2311.00301"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-hearing-programming-acoustic-scenes-with-binaural-hearables-2311.00320</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-hearing-programming-acoustic-scenes-with-binaural-hearables-2311.00320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-hearing-programming-acoustic-scenes-with-binaural-hearables-2311.00320"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speaker-recognition-do-not-learn-supra-segmental-temporal-features-2311.00489</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speaker-recognition-do-not-learn-supra-segmental-temporal-features-2311.00489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speaker-recognition-do-not-learn-supra-segmental-temporal-features-2311.00489"/></url>
<url><loc>https://scifaro.com/en/abs/active-noise-control-portable-device-design-2311.00535</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-noise-control-portable-device-design-2311.00535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-noise-control-portable-device-design-2311.00535"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-music-production-with-diffusion-models-and-guidance-gradients-2311.00613</loc><lastmod>2023-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-music-production-with-diffusion-models-and-guidance-gradients-2311.00613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-music-production-with-diffusion-models-and-guidance-gradients-2311.00613"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-self-supervised-deep-representations-for-eeg-based-auditory-attention-decoding-2311.00814</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-self-supervised-deep-representations-for-eeg-based-auditory-attention-decoding-2311.00814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-self-supervised-deep-representations-for-eeg-based-auditory-attention-decoding-2311.00814"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-real-time-voice-conversion-on-cpu-2311.00873</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-real-time-voice-conversion-on-cpu-2311.00873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-real-time-voice-conversion-on-cpu-2311.00873"/></url>
<url><loc>https://scifaro.com/en/abs/in-context-prompt-editing-for-conditional-audio-generation-2311.00895</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-context-prompt-editing-for-conditional-audio-generation-2311.00895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-context-prompt-editing-for-conditional-audio-generation-2311.00895"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-open-prompt-challenge-in-conditional-audio-generation-2311.00897</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-open-prompt-challenge-in-conditional-audio-generation-2311.00897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-open-prompt-challenge-in-conditional-audio-generation-2311.00897"/></url>
<url><loc>https://scifaro.com/en/abs/e3-tts-easy-end-to-end-diffusion-based-text-to-speech-2311.00945</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e3-tts-easy-end-to-end-diffusion-based-text-to-speech-2311.00945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e3-tts-easy-end-to-end-diffusion-based-text-to-speech-2311.00945"/></url>
<url><loc>https://scifaro.com/en/abs/video2music-suitable-music-generation-from-videos-using-an-affective-multimodal-transformer-model-2311.00968</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video2music-suitable-music-generation-from-videos-using-an-affective-multimodal-transformer-model-2311.00968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video2music-suitable-music-generation-from-videos-using-an-affective-multimodal-transformer-model-2311.00968"/></url>
<url><loc>https://scifaro.com/en/abs/atgnn-audio-tagging-graph-neural-network-2311.01526</loc><lastmod>2023-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atgnn-audio-tagging-graph-neural-network-2311.01526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atgnn-audio-tagging-graph-neural-network-2311.01526"/></url>
<url><loc>https://scifaro.com/en/abs/flap-fast-language-audio-pre-training-2311.01615</loc><lastmod>2023-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flap-fast-language-audio-pre-training-2311.01615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flap-fast-language-audio-pre-training-2311.01615"/></url>
<url><loc>https://scifaro.com/en/abs/acousto-optic-reconstruction-of-exterior-sound-field-based-on-concentric-circle-sampling-with-circular-harmonic-expansion-2311.01715</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acousto-optic-reconstruction-of-exterior-sound-field-based-on-concentric-circle-sampling-with-circular-harmonic-expansion-2311.01715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acousto-optic-reconstruction-of-exterior-sound-field-based-on-concentric-circle-sampling-with-circular-harmonic-expansion-2311.01715"/></url>
<url><loc>https://scifaro.com/en/abs/filobass-a-dataset-and-corpus-based-study-of-jazz-basslines-2311.02023</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filobass-a-dataset-and-corpus-based-study-of-jazz-basslines-2311.02023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filobass-a-dataset-and-corpus-based-study-of-jazz-basslines-2311.02023"/></url>
<url><loc>https://scifaro.com/en/abs/design-of-rubble-analyzer-probe-using-ml-for-earthquake-2311.02087</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-of-rubble-analyzer-probe-using-ml-for-earthquake-2311.02087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-of-rubble-analyzer-probe-using-ml-for-earthquake-2311.02087"/></url>
<url><loc>https://scifaro.com/en/abs/tacnet-temporal-audio-source-counting-network-2311.02369</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tacnet-temporal-audio-source-counting-network-2311.02369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tacnet-temporal-audio-source-counting-network-2311.02369"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-zero-shot-audio-to-intent-classification-2311.02482</loc><lastmod>2023-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-zero-shot-audio-to-intent-classification-2311.02482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-zero-shot-audio-to-intent-classification-2311.02482"/></url>
<url><loc>https://scifaro.com/en/abs/yet-another-generative-model-for-room-impulse-response-estimation-2311.02581</loc><lastmod>2023-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yet-another-generative-model-for-room-impulse-response-estimation-2311.02581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yet-another-generative-model-for-room-impulse-response-estimation-2311.02581"/></url>
<url><loc>https://scifaro.com/en/abs/attention-or-convolution-transformer-encoders-in-audio-language-models-for-inference-efficiency-2311.02772</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-or-convolution-transformer-encoders-in-audio-language-models-for-inference-efficiency-2311.02772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-or-convolution-transformer-encoders-in-audio-language-models-for-inference-efficiency-2311.02772"/></url>
<url><loc>https://scifaro.com/en/abs/a-foundation-model-for-music-informatics-2311.03318</loc><lastmod>2023-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-foundation-model-for-music-informatics-2311.03318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-foundation-model-for-music-informatics-2311.03318"/></url>
<url><loc>https://scifaro.com/en/abs/mfaan-unveiling-audio-deepfakes-with-a-multi-feature-authenticity-network-2311.03509</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfaan-unveiling-audio-deepfakes-with-a-multi-feature-authenticity-network-2311.03509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfaan-unveiling-audio-deepfakes-with-a-multi-feature-authenticity-network-2311.03509"/></url>
<url><loc>https://scifaro.com/en/abs/soundcam-a-dataset-for-finding-humans-using-room-acoustics-2311.03517</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundcam-a-dataset-for-finding-humans-using-room-acoustics-2311.03517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundcam-a-dataset-for-finding-humans-using-room-acoustics-2311.03517"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-latent-spaces-of-tonal-music-using-variational-autoencoders-2311.03621</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-latent-spaces-of-tonal-music-using-variational-autoencoders-2311.03621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-latent-spaces-of-tonal-music-using-variational-autoencoders-2311.03621"/></url>
<url><loc>https://scifaro.com/en/abs/improved-child-text-to-speech-synthesis-through-fastpitch-based-transfer-learning-2311.04313</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-child-text-to-speech-synthesis-through-fastpitch-based-transfer-learning-2311.04313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-child-text-to-speech-synthesis-through-fastpitch-based-transfer-learning-2311.04313"/></url>
<url><loc>https://scifaro.com/en/abs/soundbay-deep-learning-framework-for-marine-mammals-and-bioacoustic-research-2311.04343</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundbay-deep-learning-framework-for-marine-mammals-and-bioacoustic-research-2311.04343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundbay-deep-learning-framework-for-marine-mammals-and-bioacoustic-research-2311.04343"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-in-focus-enhancing-stuttered-speech-classification-with-encoder-layer-optimization-2311.05203</loc><lastmod>2023-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-in-focus-enhancing-stuttered-speech-classification-with-encoder-layer-optimization-2311.05203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-in-focus-enhancing-stuttered-speech-classification-with-encoder-layer-optimization-2311.05203"/></url>
<url><loc>https://scifaro.com/en/abs/what-do-i-hear-generating-sounds-for-visuals-with-chatgpt-2311.05609</loc><lastmod>2023-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-do-i-hear-generating-sounds-for-visuals-with-chatgpt-2311.05609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-do-i-hear-generating-sounds-for-visuals-with-chatgpt-2311.05609"/></url>
<url><loc>https://scifaro.com/en/abs/the-aerosonicdb-ypad-0523-dataset-for-acoustic-detection-and-classification-of-aircraft-2311.06368</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-aerosonicdb-ypad-0523-dataset-for-acoustic-detection-and-classification-of-aircraft-2311.06368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-aerosonicdb-ypad-0523-dataset-for-acoustic-detection-and-classification-of-aircraft-2311.06368"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-fine-tuning-using-generated-respiratory-sound-to-address-class-imbalance-2311.06480</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-fine-tuning-using-generated-respiratory-sound-to-address-class-imbalance-2311.06480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-fine-tuning-using-generated-respiratory-sound-to-address-class-imbalance-2311.06480"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-to-detect-covid-19-coughs-with-incremental-addition-of-patient-coughs-to-healthy-people-s-cough-detection-models-2311.06707</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-to-detect-covid-19-coughs-with-incremental-addition-of-patient-coughs-to-healthy-people-s-cough-detection-models-2311.06707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-to-detect-covid-19-coughs-with-incremental-addition-of-patient-coughs-to-healthy-people-s-cough-detection-models-2311.06707"/></url>
<url><loc>https://scifaro.com/en/abs/decoupling-and-interacting-multi-task-learning-network-for-joint-speech-and-accent-recognition-2311.07062</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupling-and-interacting-multi-task-learning-network-for-joint-speech-and-accent-recognition-2311.07062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupling-and-interacting-multi-task-learning-network-for-joint-speech-and-accent-recognition-2311.07062"/></url>
<url><loc>https://scifaro.com/en/abs/music-controlnet-multiple-time-varying-controls-for-music-generation-2311.07069</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-controlnet-multiple-time-varying-controls-for-music-generation-2311.07069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-controlnet-multiple-time-varying-controls-for-music-generation-2311.07069"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-study-on-the-effectiveness-of-asr-representations-for-noise-robust-speech-emotion-recognition-2311.07093</loc><lastmod>2026-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-study-on-the-effectiveness-of-asr-representations-for-noise-robust-speech-emotion-recognition-2311.07093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-study-on-the-effectiveness-of-asr-representations-for-noise-robust-speech-emotion-recognition-2311.07093"/></url>
<url><loc>https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-sound-propagation-characteristics-under-ice-covered-and-range-dependent-marine-environment-in-the-arctic-2311.07175</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-sound-propagation-characteristics-under-ice-covered-and-range-dependent-marine-environment-in-the-arctic-2311.07175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-sound-propagation-characteristics-under-ice-covered-and-range-dependent-marine-environment-in-the-arctic-2311.07175"/></url>
<url><loc>https://scifaro.com/en/abs/spontts-modeling-and-transferring-spontaneous-style-for-tts-2311.07179</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spontts-modeling-and-transferring-spontaneous-style-for-tts-2311.07179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spontts-modeling-and-transferring-spontaneous-style-for-tts-2311.07179"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-bandwidth-extension-of-musical-signals-using-a-differentiable-harmonic-plus-noise-model-2311.07363</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-bandwidth-extension-of-musical-signals-using-a-differentiable-harmonic-plus-noise-model-2311.07363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-bandwidth-extension-of-musical-signals-using-a-differentiable-harmonic-plus-noise-model-2311.07363"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-musical-object-discovery-from-audio-2311.07534</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-musical-object-discovery-from-audio-2311.07534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-musical-object-discovery-from-audio-2311.07534"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-generative-model-for-visual-guided-binaural-stereo-generation-2311.07630</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-generative-model-for-visual-guided-binaural-stereo-generation-2311.07630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-generative-model-for-visual-guided-binaural-stereo-generation-2311.07630"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-pressure-matching-strategy-using-diffusion-adaptation-2311.07729</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-pressure-matching-strategy-using-diffusion-adaptation-2311.07729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-pressure-matching-strategy-using-diffusion-adaptation-2311.07729"/></url>
<url><loc>https://scifaro.com/en/abs/parrot-trained-adversarial-examples-pushing-the-practicality-of-black-box-audio-attacks-against-speaker-recognition-models-2311.07780</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parrot-trained-adversarial-examples-pushing-the-practicality-of-black-box-audio-attacks-against-speaker-recognition-models-2311.07780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parrot-trained-adversarial-examples-pushing-the-practicality-of-black-box-audio-attacks-against-speaker-recognition-models-2311.07780"/></url>
<url><loc>https://scifaro.com/en/abs/dqr-tts-semi-supervised-text-to-speech-synthesis-with-dynamic-quantized-representation-2311.07965</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dqr-tts-semi-supervised-text-to-speech-synthesis-with-dynamic-quantized-representation-2311.07965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dqr-tts-semi-supervised-text-to-speech-synthesis-with-dynamic-quantized-representation-2311.07965"/></url>
<url><loc>https://scifaro.com/en/abs/reimagining-speech-a-scoping-review-of-deep-learning-powered-voice-conversion-2311.08104</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reimagining-speech-a-scoping-review-of-deep-learning-powered-voice-conversion-2311.08104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reimagining-speech-a-scoping-review-of-deep-learning-powered-voice-conversion-2311.08104"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-variational-auto-encoder-architectures-configurations-and-datasets-for-generative-music-explainable-ai-2311.08336</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-variational-auto-encoder-architectures-configurations-and-datasets-for-generative-music-explainable-ai-2311.08336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-variational-auto-encoder-architectures-configurations-and-datasets-for-generative-music-explainable-ai-2311.08336"/></url>
<url><loc>https://scifaro.com/en/abs/choralsynth-synthetic-dataset-of-choral-singing-2311.08350</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/choralsynth-synthetic-dataset-of-choral-singing-2311.08350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/choralsynth-synthetic-dataset-of-choral-singing-2311.08350"/></url>
<url><loc>https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-underwater-sound-propagation-dispersion-characteristics-under-dual-channel-sound-speed-profiles-in-the-chukchi-plateau-2311.08425</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-underwater-sound-propagation-dispersion-characteristics-under-dual-channel-sound-speed-profiles-in-the-chukchi-plateau-2311.08425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-and-experimental-verification-on-low-frequency-long-range-underwater-sound-propagation-dispersion-characteristics-under-dual-channel-sound-speed-profiles-in-the-chukchi-plateau-2311.08425"/></url>
<url><loc>https://scifaro.com/en/abs/edmsound-spectrogram-based-diffusion-models-for-efficient-and-high-quality-audio-synthesis-2311.08667</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edmsound-spectrogram-based-diffusion-models-for-efficient-and-high-quality-audio-synthesis-2311.08667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edmsound-spectrogram-based-diffusion-models-for-efficient-and-high-quality-audio-synthesis-2311.08667"/></url>
<url><loc>https://scifaro.com/en/abs/cln-vc-text-free-voice-conversion-based-on-fine-grained-style-control-and-contrastive-learning-with-negative-samples-augmentation-2311.08670</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cln-vc-text-free-voice-conversion-based-on-fine-grained-style-control-and-contrastive-learning-with-negative-samples-augmentation-2311.08670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cln-vc-text-free-voice-conversion-based-on-fine-grained-style-control-and-contrastive-learning-with-negative-samples-augmentation-2311.08670"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoder-with-group-based-decoder-and-multi-task-optimization-for-anomalous-sound-detection-2311.08829</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoder-with-group-based-decoder-and-multi-task-optimization-for-anomalous-sound-detection-2311.08829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoder-with-group-based-decoder-and-multi-task-optimization-for-anomalous-sound-detection-2311.08829"/></url>
<url><loc>https://scifaro.com/en/abs/crepe-notes-a-new-method-for-segmenting-pitch-contours-into-discrete-notes-2311.08884</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crepe-notes-a-new-method-for-segmenting-pitch-contours-into-discrete-notes-2311.08884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crepe-notes-a-new-method-for-segmenting-pitch-contours-into-discrete-notes-2311.08884"/></url>
<url><loc>https://scifaro.com/en/abs/can-musicgen-create-training-data-for-mir-tasks-2311.09094</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-musicgen-create-training-data-for-mir-tasks-2311.09094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-musicgen-create-training-data-for-mir-tasks-2311.09094"/></url>
<url><loc>https://scifaro.com/en/abs/future-full-ocean-deep-ssps-prediction-based-on-hierarchical-long-short-term-memory-neural-networks-2311.09537</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/future-full-ocean-deep-ssps-prediction-based-on-hierarchical-long-short-term-memory-neural-networks-2311.09537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/future-full-ocean-deep-ssps-prediction-based-on-hierarchical-long-short-term-memory-neural-networks-2311.09537"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-spectrogram-transformer-for-respiratory-sound-classification-2311.09655</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-spectrogram-transformer-for-respiratory-sound-classification-2311.09655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-spectrogram-transformer-for-respiratory-sound-classification-2311.09655"/></url>
<url><loc>https://scifaro.com/en/abs/dino-vits-data-efficient-zero-shot-tts-with-self-supervised-speaker-verification-loss-for-noise-robustness-2311.09770</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dino-vits-data-efficient-zero-shot-tts-with-self-supervised-speaker-verification-loss-for-noise-robustness-2311.09770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dino-vits-data-efficient-zero-shot-tts-with-self-supervised-speaker-verification-loss-for-noise-robustness-2311.09770"/></url>
<url><loc>https://scifaro.com/en/abs/the-song-describer-dataset-a-corpus-of-audio-captions-for-music-and-language-evaluation-2311.10057</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-song-describer-dataset-a-corpus-of-audio-captions-for-music-and-language-evaluation-2311.10057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-song-describer-dataset-a-corpus-of-audio-captions-for-music-and-language-evaluation-2311.10057"/></url>
<url><loc>https://scifaro.com/en/abs/aquatk-an-audio-quality-assessment-toolkit-2311.10113</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aquatk-an-audio-quality-assessment-toolkit-2311.10113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aquatk-an-audio-quality-assessment-toolkit-2311.10113"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-generation-of-symbolic-music-with-llms-2311.10384</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-generation-of-symbolic-music-with-llms-2311.10384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-generation-of-symbolic-music-with-llms-2311.10384"/></url>
<url><loc>https://scifaro.com/en/abs/the-persian-piano-corpus-a-collection-of-instrument-based-feature-extracted-data-considering-dastgah-2311.11074</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-persian-piano-corpus-a-collection-of-instrument-based-feature-extracted-data-considering-dastgah-2311.11074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-persian-piano-corpus-a-collection-of-instrument-based-feature-extracted-data-considering-dastgah-2311.11074"/></url>
<url><loc>https://scifaro.com/en/abs/m-2-ugen-multi-modal-music-understanding-and-generation-with-the-power-of-large-language-models-2311.11255</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m-2-ugen-multi-modal-music-understanding-and-generation-with-the-power-of-large-language-models-2311.11255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m-2-ugen-multi-modal-music-understanding-and-generation-with-the-power-of-large-language-models-2311.11255"/></url>
<url><loc>https://scifaro.com/en/abs/encoding-performance-data-in-mei-with-the-automatic-music-performance-analysis-and-comparison-toolkit-ampact-2311.11363</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/encoding-performance-data-in-mei-with-the-automatic-music-performance-analysis-and-comparison-toolkit-ampact-2311.11363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/encoding-performance-data-in-mei-with-the-automatic-music-performance-analysis-and-comparison-toolkit-ampact-2311.11363"/></url>
<url><loc>https://scifaro.com/en/abs/elf-encoding-speaker-specific-latent-speech-feature-for-speech-synthesis-2311.11745</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/elf-encoding-speaker-specific-latent-speech-feature-for-speech-synthesis-2311.11745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/elf-encoding-speaker-specific-latent-speech-feature-for-speech-synthesis-2311.11745"/></url>
<url><loc>https://scifaro.com/en/abs/formal-verification-of-long-short-term-memory-based-audio-classifiers-a-star-based-approach-2311.12130</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/formal-verification-of-long-short-term-memory-based-audio-classifiers-a-star-based-approach-2311.12130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/formal-verification-of-long-short-term-memory-based-audio-classifiers-a-star-based-approach-2311.12130"/></url>
<url><loc>https://scifaro.com/en/abs/improving-label-assignments-learning-by-dynamic-sample-dropout-combined-with-layer-wise-optimization-in-speech-separation-2311.12199</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-label-assignments-learning-by-dynamic-sample-dropout-combined-with-layer-wise-optimization-in-speech-separation-2311.12199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-label-assignments-learning-by-dynamic-sample-dropout-combined-with-layer-wise-optimization-in-speech-separation-2311.12199"/></url>
<url><loc>https://scifaro.com/en/abs/equipping-pretrained-unconditional-music-transformers-with-instrument-and-genre-controls-2311.12257</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/equipping-pretrained-unconditional-music-transformers-with-instrument-and-genre-controls-2311.12257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/equipping-pretrained-unconditional-music-transformers-with-instrument-and-genre-controls-2311.12257"/></url>
<url><loc>https://scifaro.com/en/abs/hierspeech-bridging-the-gap-between-semantic-and-acoustic-representation-of-speech-by-hierarchical-variational-inference-for-zero-shot-speech-synthesis-2311.12454</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierspeech-bridging-the-gap-between-semantic-and-acoustic-representation-of-speech-by-hierarchical-variational-inference-for-zero-shot-speech-synthesis-2311.12454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierspeech-bridging-the-gap-between-semantic-and-acoustic-representation-of-speech-by-hierarchical-variational-inference-for-zero-shot-speech-synthesis-2311.12454"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-music-source-separation-using-vector-quantized-source-category-estimates-2311.13058</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-music-source-separation-using-vector-quantized-source-category-estimates-2311.13058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-music-source-separation-using-vector-quantized-source-category-estimates-2311.13058"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-solve-inverse-problems-for-perceptual-sound-matching-2311.14213</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-solve-inverse-problems-for-perceptual-sound-matching-2311.14213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-solve-inverse-problems-for-perceptual-sound-matching-2311.14213"/></url>
<url><loc>https://scifaro.com/en/abs/allpass-impulse-response-modelling-2311.14239</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/allpass-impulse-response-modelling-2311.14239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/allpass-impulse-response-modelling-2311.14239"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-the-2023-icassp-sp-clarity-challenge-speech-enhancement-for-hearing-aids-2311.14490</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-the-2023-icassp-sp-clarity-challenge-speech-enhancement-for-hearing-aids-2311.14490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-the-2023-icassp-sp-clarity-challenge-speech-enhancement-for-hearing-aids-2311.14490"/></url>
<url><loc>https://scifaro.com/en/abs/tinyclap-distilling-constrastive-language-audio-pretrained-models-2311.14517</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tinyclap-distilling-constrastive-language-audio-pretrained-models-2311.14517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tinyclap-distilling-constrastive-language-audio-pretrained-models-2311.14517"/></url>
<url><loc>https://scifaro.com/en/abs/weak-alignment-supervision-from-hybrid-model-improves-end-to-end-asr-2311.14835</loc><lastmod>2023-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weak-alignment-supervision-from-hybrid-model-improves-end-to-end-asr-2311.14835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weak-alignment-supervision-from-hybrid-model-improves-end-to-end-asr-2311.14835"/></url>
<url><loc>https://scifaro.com/en/abs/custom-data-augmentation-for-low-resource-asr-using-bark-and-retrieval-based-voice-conversion-2311.14836</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/custom-data-augmentation-for-low-resource-asr-using-bark-and-retrieval-based-voice-conversion-2311.14836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/custom-data-augmentation-for-low-resource-asr-using-bark-and-retrieval-based-voice-conversion-2311.14836"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-sub-band-constant-q-transform-discriminator-for-high-fidelity-vocoder-2311.14957</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-sub-band-constant-q-transform-discriminator-for-high-fidelity-vocoder-2311.14957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-sub-band-constant-q-transform-discriminator-for-high-fidelity-vocoder-2311.14957"/></url>
<url><loc>https://scifaro.com/en/abs/lightly-weighted-automatic-audio-parameter-extraction-for-the-quality-assessment-of-consensus-auditory-perceptual-evaluation-of-voice-2311.15582</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightly-weighted-automatic-audio-parameter-extraction-for-the-quality-assessment-of-consensus-auditory-perceptual-evaluation-of-voice-2311.15582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightly-weighted-automatic-audio-parameter-extraction-for-the-quality-assessment-of-consensus-auditory-perceptual-evaluation-of-voice-2311.15582"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-aware-speaker-embedding-for-far-field-speaker-verification-2311.15627</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-aware-speaker-embedding-for-far-field-speaker-verification-2311.15627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-aware-speaker-embedding-for-far-field-speaker-verification-2311.15627"/></url>
<url><loc>https://scifaro.com/en/abs/a-jepa-joint-embedding-predictive-architecture-can-listen-2311.15830</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-jepa-joint-embedding-predictive-architecture-can-listen-2311.15830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-jepa-joint-embedding-predictive-architecture-can-listen-2311.15830"/></url>
<url><loc>https://scifaro.com/en/abs/cheapnet-improving-light-weight-speech-enhancement-network-by-projected-loss-function-2311.15959</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cheapnet-improving-light-weight-speech-enhancement-network-by-projected-loss-function-2311.15959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cheapnet-improving-light-weight-speech-enhancement-network-by-projected-loss-function-2311.15959"/></url>
<url><loc>https://scifaro.com/en/abs/d4am-a-general-denoising-framework-for-downstream-acoustic-models-2311.16595</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/d4am-a-general-denoising-framework-for-downstream-acoustic-models-2311.16595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/d4am-a-general-denoising-framework-for-downstream-acoustic-models-2311.16595"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-strauss-a-flexible-sonification-python-package-2311.16847</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-strauss-a-flexible-sonification-python-package-2311.16847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-strauss-a-flexible-sonification-python-package-2311.16847"/></url>
<url><loc>https://scifaro.com/en/abs/fat-hubert-front-end-adaptive-training-of-hidden-unit-bert-for-distortion-invariant-robust-speech-recognition-2311.17790</loc><lastmod>2023-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fat-hubert-front-end-adaptive-training-of-hidden-unit-bert-for-distortion-invariant-robust-speech-recognition-2311.17790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fat-hubert-front-end-adaptive-training-of-hidden-unit-bert-for-distortion-invariant-robust-speech-recognition-2311.17790"/></url>
<url><loc>https://scifaro.com/en/abs/string-sound-synthesizer-on-gpu-accelerated-finite-difference-scheme-2311.18505</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/string-sound-synthesizer-on-gpu-accelerated-finite-difference-scheme-2311.18505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/string-sound-synthesizer-on-gpu-accelerated-finite-difference-scheme-2311.18505"/></url>
<url><loc>https://scifaro.com/en/abs/barwise-music-structure-analysis-with-the-correlation-block-matching-segmentation-algorithm-2311.18604</loc><lastmod>2023-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/barwise-music-structure-analysis-with-the-correlation-block-matching-segmentation-algorithm-2311.18604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/barwise-music-structure-analysis-with-the-correlation-block-matching-segmentation-algorithm-2311.18604"/></url>
<url><loc>https://scifaro.com/en/abs/sound-terminology-describing-production-and-perception-of-sonification-2312.00091</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-terminology-describing-production-and-perception-of-sonification-2312.00091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-terminology-describing-production-and-perception-of-sonification-2312.00091"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-of-spatial-acoustic-representation-with-cross-channel-signal-reconstruction-and-multi-channel-conformer-2312.00476</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-of-spatial-acoustic-representation-with-cross-channel-signal-reconstruction-and-multi-channel-conformer-2312.00476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-of-spatial-acoustic-representation-with-cross-channel-signal-reconstruction-and-multi-channel-conformer-2312.00476"/></url>
<url><loc>https://scifaro.com/en/abs/av-rir-audio-visual-room-impulse-response-estimation-2312.00834</loc><lastmod>2024-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-rir-audio-visual-room-impulse-response-estimation-2312.00834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-rir-audio-visual-room-impulse-response-estimation-2312.00834"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-signal-analysis-with-deep-neural-network-for-detecting-fault-diagnosis-in-industrial-machines-2312.01062</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-signal-analysis-with-deep-neural-network-for-detecting-fault-diagnosis-in-industrial-machines-2312.01062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-signal-analysis-with-deep-neural-network-for-detecting-fault-diagnosis-in-industrial-machines-2312.01062"/></url>
<url><loc>https://scifaro.com/en/abs/a-semi-supervised-deep-learning-approach-to-dataset-collection-for-query-by-humming-task-2312.01092</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-semi-supervised-deep-learning-approach-to-dataset-collection-for-query-by-humming-task-2312.01092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-semi-supervised-deep-learning-approach-to-dataset-collection-for-query-by-humming-task-2312.01092"/></url>
<url><loc>https://scifaro.com/en/abs/openvoice-versatile-instant-voice-cloning-2312.01479</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/openvoice-versatile-instant-voice-cloning-2312.01479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/openvoice-versatile-instant-voice-cloning-2312.01479"/></url>
<url><loc>https://scifaro.com/en/abs/building-ears-for-robots-machine-hearing-in-the-age-of-autonomy-2312.01554</loc><lastmod>2023-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-ears-for-robots-machine-hearing-in-the-age-of-autonomy-2312.01554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-ears-for-robots-machine-hearing-in-the-age-of-autonomy-2312.01554"/></url>
<url><loc>https://scifaro.com/en/abs/a-text-dependent-speaker-verification-application-framework-based-on-chinese-numerical-string-corpus-2312.01645</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-text-dependent-speaker-verification-application-framework-based-on-chinese-numerical-string-corpus-2312.01645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-text-dependent-speaker-verification-application-framework-based-on-chinese-numerical-string-corpus-2312.01645"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-viability-of-synthetic-audio-data-for-audio-based-dialogue-state-tracking-2312.01842</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-viability-of-synthetic-audio-data-for-audio-based-dialogue-state-tracking-2312.01842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-viability-of-synthetic-audio-data-for-audio-based-dialogue-state-tracking-2312.01842"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-data-generation-techniques-for-developing-ai-based-speech-assessments-for-parkinson-s-disease-a-comparative-study-2312.02229</loc><lastmod>2023-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-data-generation-techniques-for-developing-ai-based-speech-assessments-for-parkinson-s-disease-a-comparative-study-2312.02229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-data-generation-techniques-for-developing-ai-based-speech-assessments-for-parkinson-s-disease-a-comparative-study-2312.02229"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-plug-and-play-data-priors-with-weighted-prediction-error-for-speech-dereverberation-2312.02773</loc><lastmod>2023-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-plug-and-play-data-priors-with-weighted-prediction-error-for-speech-dereverberation-2312.02773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-plug-and-play-data-priors-with-weighted-prediction-error-for-speech-dereverberation-2312.02773"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-voice-cloning-attacks-via-timbre-watermarking-2312.03410</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-voice-cloning-attacks-via-timbre-watermarking-2312.03410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-voice-cloning-attacks-via-timbre-watermarking-2312.03410"/></url>
<url><loc>https://scifaro.com/en/abs/data-is-overrated-perceptual-metrics-can-lead-learning-in-the-absence-of-training-data-2312.03455</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-is-overrated-perceptual-metrics-can-lead-learning-in-the-absence-of-training-data-2312.03455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-is-overrated-perceptual-metrics-can-lead-learning-in-the-absence-of-training-data-2312.03455"/></url>
<url><loc>https://scifaro.com/en/abs/jammin-gpt-text-based-improvisation-using-llms-in-ableton-live-2312.03479</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jammin-gpt-text-based-improvisation-using-llms-in-ableton-live-2312.03479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jammin-gpt-text-based-improvisation-using-llms-in-ableton-live-2312.03479"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-data-and-resource-efficient-device-directed-speech-detection-with-large-foundation-models-2312.03632</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-data-and-resource-efficient-device-directed-speech-detection-with-large-foundation-models-2312.03632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-data-and-resource-efficient-device-directed-speech-detection-with-large-foundation-models-2312.03632"/></url>
<url><loc>https://scifaro.com/en/abs/towards-small-and-accurate-convolutional-neural-networks-for-acoustic-biodiversity-monitoring-2312.03666</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-small-and-accurate-convolutional-neural-networks-for-acoustic-biodiversity-monitoring-2312.03666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-small-and-accurate-convolutional-neural-networks-for-acoustic-biodiversity-monitoring-2312.03666"/></url>
<url><loc>https://scifaro.com/en/abs/sound-source-localization-for-a-source-inside-a-structure-using-ac-cyclegan-2312.04846</loc><lastmod>2023-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-source-localization-for-a-source-inside-a-structure-using-ac-cyclegan-2312.04846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-source-localization-for-a-source-inside-a-structure-using-ac-cyclegan-2312.04846"/></url>
<url><loc>https://scifaro.com/en/abs/neural-concatenative-singing-voice-conversion-rethinking-concatenation-based-approach-for-one-shot-singing-voice-conversion-2312.04919</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-concatenative-singing-voice-conversion-rethinking-concatenation-based-approach-for-one-shot-singing-voice-conversion-2312.04919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-concatenative-singing-voice-conversion-rethinking-concatenation-based-approach-for-one-shot-singing-voice-conversion-2312.04919"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-study-assessing-the-combined-framework-of-wavlm-and-best-rq-for-text-to-speech-synthesis-2312.05415</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-study-assessing-the-combined-framework-of-wavlm-and-best-rq-for-text-to-speech-synthesis-2312.05415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-study-assessing-the-combined-framework-of-wavlm-and-best-rq-for-text-to-speech-synthesis-2312.05415"/></url>
<url><loc>https://scifaro.com/en/abs/keyword-spotting-detecting-commands-in-speech-using-deep-learning-2312.05640</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keyword-spotting-detecting-commands-in-speech-using-deep-learning-2312.05640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keyword-spotting-detecting-commands-in-speech-using-deep-learning-2312.05640"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-detection-vad-in-noisy-environments-2312.05815</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-detection-vad-in-noisy-environments-2312.05815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-detection-vad-in-noisy-environments-2312.05815"/></url>
<url><loc>https://scifaro.com/en/abs/mir-ref-a-representation-evaluation-framework-for-music-information-retrieval-tasks-2312.05994</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mir-ref-a-representation-evaluation-framework-for-music-information-retrieval-tasks-2312.05994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mir-ref-a-representation-evaluation-framework-for-music-information-retrieval-tasks-2312.05994"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-text-retrieval-via-contrastive-learning-2312.06055</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-text-retrieval-via-contrastive-learning-2312.06055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-text-retrieval-via-contrastive-learning-2312.06055"/></url>
<url><loc>https://scifaro.com/en/abs/rose-a-recognition-oriented-speech-enhancement-framework-in-air-traffic-control-using-multi-objective-learning-2312.06118</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rose-a-recognition-oriented-speech-enhancement-framework-in-air-traffic-control-using-multi-objective-learning-2312.06118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rose-a-recognition-oriented-speech-enhancement-framework-in-air-traffic-control-using-multi-objective-learning-2312.06118"/></url>
<url><loc>https://scifaro.com/en/abs/mart-learning-hierarchical-music-audio-representations-with-part-whole-transformer-2312.06197</loc><lastmod>2024-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mart-learning-hierarchical-music-audio-representations-with-part-whole-transformer-2312.06197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mart-learning-hierarchical-music-audio-representations-with-part-whole-transformer-2312.06197"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-attractors-for-robust-and-efficient-end-to-end-neural-diarization-2312.06253</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-attractors-for-robust-and-efficient-end-to-end-neural-diarization-2312.06253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-attractors-for-robust-and-efficient-end-to-end-neural-diarization-2312.06253"/></url>
<url><loc>https://scifaro.com/en/abs/deep-imbalanced-learning-for-multimodal-emotion-recognition-in-conversations-2312.06337</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-imbalanced-learning-for-multimodal-emotion-recognition-in-conversations-2312.06337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-imbalanced-learning-for-multimodal-emotion-recognition-in-conversations-2312.06337"/></url>
<url><loc>https://scifaro.com/en/abs/towards-domain-specific-cross-corpus-speech-emotion-recognition-approach-2312.06466</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-domain-specific-cross-corpus-speech-emotion-recognition-approach-2312.06466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-domain-specific-cross-corpus-speech-emotion-recognition-approach-2312.06466"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-cnn-network-for-audio-signature-analysis-in-noisy-environments-2312.07059</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-cnn-network-for-audio-signature-analysis-in-noisy-environments-2312.07059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-cnn-network-for-audio-signature-analysis-in-noisy-environments-2312.07059"/></url>
<url><loc>https://scifaro.com/en/abs/robust-end-to-end-diarization-with-domain-adaptive-training-and-multi-task-learning-2312.07136</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-end-to-end-diarization-with-domain-adaptive-training-and-multi-task-learning-2312.07136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-end-to-end-diarization-with-domain-adaptive-training-and-multi-task-learning-2312.07136"/></url>
<url><loc>https://scifaro.com/en/abs/improving-spatial-resolution-of-first-order-ambisonics-using-sparse-mdct-representation-2312.08069</loc><lastmod>2023-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-spatial-resolution-of-first-order-ambisonics-using-sparse-mdct-representation-2312.08069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-spatial-resolution-of-first-order-ambisonics-using-sparse-mdct-representation-2312.08069"/></url>
<url><loc>https://scifaro.com/en/abs/permod-perceptually-grounded-voice-modification-with-latent-diffusion-models-2312.08494</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/permod-perceptually-grounded-voice-modification-with-latent-diffusion-models-2312.08494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/permod-perceptually-grounded-voice-modification-with-latent-diffusion-models-2312.08494"/></url>
<url><loc>https://scifaro.com/en/abs/phaseperturbation-speech-data-augmentation-via-phase-perturbation-for-automatic-speech-recognition-2312.08571</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phaseperturbation-speech-data-augmentation-via-phase-perturbation-for-automatic-speech-recognition-2312.08571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phaseperturbation-speech-data-augmentation-via-phase-perturbation-for-automatic-speech-recognition-2312.08571"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-constrained-multichannel-signal-denoising-considering-channel-dependent-sensitivity-inspired-by-self-supervised-learning-for-optical-fiber-sensing-2312.08660</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-constrained-multichannel-signal-denoising-considering-channel-dependent-sensitivity-inspired-by-self-supervised-learning-for-optical-fiber-sensing-2312.08660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-constrained-multichannel-signal-denoising-considering-channel-dependent-sensitivity-inspired-by-self-supervised-learning-for-optical-fiber-sensing-2312.08660"/></url>
<url><loc>https://scifaro.com/en/abs/sef-vc-speaker-embedding-free-zero-shot-voice-conversion-with-cross-attention-2312.08676</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sef-vc-speaker-embedding-free-zero-shot-voice-conversion-with-cross-attention-2312.08676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sef-vc-speaker-embedding-free-zero-shot-voice-conversion-with-cross-attention-2312.08676"/></url>
<url><loc>https://scifaro.com/en/abs/stemgen-a-music-generation-model-that-listens-2312.08723</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stemgen-a-music-generation-model-that-listens-2312.08723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stemgen-a-music-generation-model-that-listens-2312.08723"/></url>
<url><loc>https://scifaro.com/en/abs/tia-a-teaching-intonation-assessment-dataset-in-real-teaching-situations-2312.08732</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tia-a-teaching-intonation-assessment-dataset-in-real-teaching-situations-2312.08732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tia-a-teaching-intonation-assessment-dataset-in-real-teaching-situations-2312.08732"/></url>
<url><loc>https://scifaro.com/en/abs/hourglass-avsr-down-up-sampling-based-computational-efficiency-model-for-audio-visual-speech-recognition-2312.08850</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hourglass-avsr-down-up-sampling-based-computational-efficiency-model-for-audio-visual-speech-recognition-2312.08850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hourglass-avsr-down-up-sampling-based-computational-efficiency-model-for-audio-visual-speech-recognition-2312.08850"/></url>
<url><loc>https://scifaro.com/en/abs/n-gram-unsupervised-compoundation-and-feature-injection-for-better-symbolic-music-understanding-2312.08931</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/n-gram-unsupervised-compoundation-and-feature-injection-for-better-symbolic-music-understanding-2312.08931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/n-gram-unsupervised-compoundation-and-feature-injection-for-better-symbolic-music-understanding-2312.08931"/></url>
<url><loc>https://scifaro.com/en/abs/multi-cmgan-leveraging-multi-objective-speech-quality-metric-prediction-for-speech-enhancement-2312.08979</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-cmgan-leveraging-multi-objective-speech-quality-metric-prediction-for-speech-enhancement-2312.08979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-cmgan-leveraging-multi-objective-speech-quality-metric-prediction-for-speech-enhancement-2312.08979"/></url>
<url><loc>https://scifaro.com/en/abs/star-distilling-speech-temporal-relation-for-lightweight-speech-self-supervised-learning-models-2312.09040</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/star-distilling-speech-temporal-relation-for-lightweight-speech-self-supervised-learning-models-2312.09040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/star-distilling-speech-temporal-relation-for-lightweight-speech-self-supervised-learning-models-2312.09040"/></url>
<url><loc>https://scifaro.com/en/abs/f1-ev-score-measuring-the-likelihood-of-estimating-a-good-decision-threshold-for-semi-supervised-anomaly-detection-2312.09143</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f1-ev-score-measuring-the-likelihood-of-estimating-a-good-decision-threshold-for-semi-supervised-anomaly-detection-2312.09143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f1-ev-score-measuring-the-likelihood-of-estimating-a-good-decision-threshold-for-semi-supervised-anomaly-detection-2312.09143"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-models-of-brazilian-portuguese-speech-based-on-neural-transformers-2312.09265</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-models-of-brazilian-portuguese-speech-based-on-neural-transformers-2312.09265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-models-of-brazilian-portuguese-speech-based-on-neural-transformers-2312.09265"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-speech-detection-in-environmental-audio-using-acoustic-recognition-and-knowledge-distillation-2312.09269</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-speech-detection-in-environmental-audio-using-acoustic-recognition-and-knowledge-distillation-2312.09269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-speech-detection-in-environmental-audio-using-acoustic-recognition-and-knowledge-distillation-2312.09269"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-fine-tuning-of-audio-only-asr-models-2312.09369</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-fine-tuning-of-audio-only-asr-models-2312.09369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-fine-tuning-of-audio-only-asr-models-2312.09369"/></url>
<url><loc>https://scifaro.com/en/abs/a-1-6-mw-sparse-deep-learning-accelerator-for-speech-separation-2312.09580</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-1-6-mw-sparse-deep-learning-accelerator-for-speech-separation-2312.09580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-1-6-mw-sparse-deep-learning-accelerator-for-speech-separation-2312.09580"/></url>
<url><loc>https://scifaro.com/en/abs/stethoscope-guided-supervised-contrastive-learning-for-cross-domain-adaptation-on-respiratory-sound-classification-2312.09603</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stethoscope-guided-supervised-contrastive-learning-for-cross-domain-adaptation-on-respiratory-sound-classification-2312.09603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stethoscope-guided-supervised-contrastive-learning-for-cross-domain-adaptation-on-respiratory-sound-classification-2312.09603"/></url>
<url><loc>https://scifaro.com/en/abs/what-to-remember-self-adaptive-continual-learning-for-audio-deepfake-detection-2312.09651</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-to-remember-self-adaptive-continual-learning-for-audio-deepfake-detection-2312.09651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-to-remember-self-adaptive-continual-learning-for-audio-deepfake-detection-2312.09651"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-channel-selection-and-spatial-feature-integration-for-multi-channel-speech-recognition-across-various-array-topologies-2312.09746</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-channel-selection-and-spatial-feature-integration-for-multi-channel-speech-recognition-across-various-array-topologies-2312.09746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-channel-selection-and-spatial-feature-integration-for-multi-channel-speech-recognition-across-various-array-topologies-2312.09746"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-compression-of-shallow-non-causal-asr-models-using-knowledge-distillation-and-tied-and-reduced-decoder-for-low-latency-on-device-speech-recognition-2312.09842</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-compression-of-shallow-non-causal-asr-models-using-knowledge-distillation-and-tied-and-reduced-decoder-for-low-latency-on-device-speech-recognition-2312.09842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-compression-of-shallow-non-causal-asr-models-using-knowledge-distillation-and-tied-and-reduced-decoder-for-low-latency-on-device-speech-recognition-2312.09842"/></url>
<url><loc>https://scifaro.com/en/abs/amphion-an-open-source-audio-music-and-speech-generation-toolkit-2312.09911</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amphion-an-open-source-audio-music-and-speech-generation-toolkit-2312.09911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amphion-an-open-source-audio-music-and-speech-generation-toolkit-2312.09911"/></url>
<url><loc>https://scifaro.com/en/abs/vocopilot-voice-activated-tracking-of-everyday-interactions-2312.10265</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocopilot-voice-activated-tracking-of-everyday-interactions-2312.10265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocopilot-voice-activated-tracking-of-everyday-interactions-2312.10265"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-disentangled-representation-learning-for-robust-target-speech-extraction-2312.10305</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-disentangled-representation-learning-for-robust-target-speech-extraction-2312.10305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-disentangled-representation-learning-for-robust-target-speech-extraction-2312.10305"/></url>
<url><loc>https://scifaro.com/en/abs/muser-musical-element-based-regularization-for-generating-symbolic-music-with-emotion-2312.10307</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muser-musical-element-based-regularization-for-generating-symbolic-music-with-emotion-2312.10307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muser-musical-element-based-regularization-for-generating-symbolic-music-with-emotion-2312.10307"/></url>
<url><loc>https://scifaro.com/en/abs/secap-speech-emotion-captioning-with-large-language-model-2312.10381</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/secap-speech-emotion-captioning-with-large-language-model-2312.10381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/secap-speech-emotion-captioning-with-large-language-model-2312.10381"/></url>
<url><loc>https://scifaro.com/en/abs/annotation-free-automatic-music-transcription-with-scalable-synthetic-data-and-adversarial-domain-confusion-2312.10402</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/annotation-free-automatic-music-transcription-with-scalable-synthetic-data-and-adversarial-domain-confusion-2312.10402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/annotation-free-automatic-music-transcription-with-scalable-synthetic-data-and-adversarial-domain-confusion-2312.10402"/></url>
<url><loc>https://scifaro.com/en/abs/seq2seq-for-automatic-paraphasia-detection-in-aphasic-speech-2312.10518</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seq2seq-for-automatic-paraphasia-detection-in-aphasic-speech-2312.10518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seq2seq-for-automatic-paraphasia-detection-in-aphasic-speech-2312.10518"/></url>
<url><loc>https://scifaro.com/en/abs/meta-af-echo-cancellation-for-improved-keyword-spotting-2312.10605</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-af-echo-cancellation-for-improved-keyword-spotting-2312.10605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-af-echo-cancellation-for-improved-keyword-spotting-2312.10605"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-sound-vs-vibration-for-robust-fault-detection-on-rotating-machinery-2312.10742</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-sound-vs-vibration-for-robust-fault-detection-on-rotating-machinery-2312.10742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-sound-vs-vibration-for-robust-fault-detection-on-rotating-machinery-2312.10742"/></url>
<url><loc>https://scifaro.com/en/abs/an-extended-variational-mode-decomposition-algorithm-developed-speech-emotion-recognition-performance-2312.10937</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-extended-variational-mode-decomposition-algorithm-developed-speech-emotion-recognition-performance-2312.10937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-extended-variational-mode-decomposition-algorithm-developed-speech-emotion-recognition-performance-2312.10937"/></url>
<url><loc>https://scifaro.com/en/abs/leveraged-mel-spectrograms-using-harmonic-and-percussive-components-in-speech-emotion-recognition-2312.10949</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraged-mel-spectrograms-using-harmonic-and-percussive-components-in-speech-emotion-recognition-2312.10949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraged-mel-spectrograms-using-harmonic-and-percussive-components-in-speech-emotion-recognition-2312.10949"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-mask-transformer-for-multi-talker-overlapped-speech-recognition-2312.10959</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-mask-transformer-for-multi-talker-overlapped-speech-recognition-2312.10959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-mask-transformer-for-multi-talker-overlapped-speech-recognition-2312.10959"/></url>
<url><loc>https://scifaro.com/en/abs/3s-tse-efficient-three-stage-target-speaker-extraction-for-real-time-and-low-resource-applications-2312.10979</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3s-tse-efficient-three-stage-target-speaker-extraction-for-real-time-and-low-resource-applications-2312.10979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3s-tse-efficient-three-stage-target-speaker-extraction-for-real-time-and-low-resource-applications-2312.10979"/></url>
<url><loc>https://scifaro.com/en/abs/improved-long-form-speech-recognition-by-jointly-modeling-the-primary-and-non-primary-speakers-2312.11123</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-long-form-speech-recognition-by-jointly-modeling-the-primary-and-non-primary-speakers-2312.11123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-long-form-speech-recognition-by-jointly-modeling-the-primary-and-non-primary-speakers-2312.11123"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-musical-features-for-interpretable-audio-tagging-2312.11234</loc><lastmod>2024-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-musical-features-for-interpretable-audio-tagging-2312.11234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-musical-features-for-interpretable-audio-tagging-2312.11234"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-barlow-twins-and-vicreg-self-supervised-learning-for-sound-patterns-of-bird-and-anuran-species-2312.11240</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-barlow-twins-and-vicreg-self-supervised-learning-for-sound-patterns-of-bird-and-anuran-species-2312.11240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-barlow-twins-and-vicreg-self-supervised-learning-for-sound-patterns-of-bird-and-anuran-species-2312.11240"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-based-study-on-different-text-to-speech-technologies-2312.11563</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-based-study-on-different-text-to-speech-technologies-2312.11563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-based-study-on-different-text-to-speech-technologies-2312.11563"/></url>
<url><loc>https://scifaro.com/en/abs/mossformer2-combining-transformer-and-rnn-free-recurrent-network-for-enhanced-time-domain-monaural-speech-separation-2312.11825</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mossformer2-combining-transformer-and-rnn-free-recurrent-network-for-enhanced-time-domain-monaural-speech-separation-2312.11825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mossformer2-combining-transformer-and-rnn-free-recurrent-network-for-enhanced-time-domain-monaural-speech-separation-2312.11825"/></url>
<url><loc>https://scifaro.com/en/abs/ms-senet-enhancing-speech-emotion-recognition-through-multi-scale-feature-fusion-with-squeeze-and-excitation-blocks-2312.11974</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ms-senet-enhancing-speech-emotion-recognition-through-multi-scale-feature-fusion-with-squeeze-and-excitation-blocks-2312.11974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ms-senet-enhancing-speech-emotion-recognition-through-multi-scale-feature-fusion-with-squeeze-and-excitation-blocks-2312.11974"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-distillation-of-self-supervised-speech-models-via-correlation-metrics-2312.12153</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-distillation-of-self-supervised-speech-models-via-correlation-metrics-2312.12153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-distillation-of-self-supervised-speech-models-via-correlation-metrics-2312.12153"/></url>
<url><loc>https://scifaro.com/en/abs/stylespeech-self-supervised-style-enhancing-with-vq-vae-based-pre-training-for-expressive-audiobook-speech-synthesis-2312.12181</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylespeech-self-supervised-style-enhancing-with-vq-vae-based-pre-training-for-expressive-audiobook-speech-synthesis-2312.12181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylespeech-self-supervised-style-enhancing-with-vq-vae-based-pre-training-for-expressive-audiobook-speech-synthesis-2312.12181"/></url>
<url><loc>https://scifaro.com/en/abs/underwater-acoustic-signal-recognition-based-on-salient-feature-2312.13143</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underwater-acoustic-signal-recognition-based-on-salient-feature-2312.13143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underwater-acoustic-signal-recognition-based-on-salient-feature-2312.13143"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-knowledge-distillation-for-speech-emotion-recognition-in-noisy-conditions-2312.13556</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-knowledge-distillation-for-speech-emotion-recognition-in-noisy-conditions-2312.13556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-knowledge-distillation-for-speech-emotion-recognition-in-noisy-conditions-2312.13556"/></url>
<url><loc>https://scifaro.com/en/abs/knn-ctc-enhancing-asr-via-retrieval-of-ctc-pseudo-labels-2312.13560</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knn-ctc-enhancing-asr-via-retrieval-of-ctc-pseudo-labels-2312.13560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knn-ctc-enhancing-asr-via-retrieval-of-ctc-pseudo-labels-2312.13560"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-disentangled-representation-learning-for-multimodal-emotion-recognition-2312.13567</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-disentangled-representation-learning-for-multimodal-emotion-recognition-2312.13567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-disentangled-representation-learning-for-multimodal-emotion-recognition-2312.13567"/></url>
<url><loc>https://scifaro.com/en/abs/bae-net-a-low-complexity-and-high-fidelity-bandwidth-adaptive-neural-network-for-speech-super-resolution-2312.13722</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bae-net-a-low-complexity-and-high-fidelity-bandwidth-adaptive-neural-network-for-speech-super-resolution-2312.13722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bae-net-a-low-complexity-and-high-fidelity-bandwidth-adaptive-neural-network-for-speech-super-resolution-2312.13722"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-adaptive-av-fusion-module-for-pre-trained-asr-models-2312.13873</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-adaptive-av-fusion-module-for-pre-trained-asr-models-2312.13873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-adaptive-av-fusion-module-for-pre-trained-asr-models-2312.13873"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-choice-of-the-optimal-temporal-support-for-audio-classification-with-pre-trained-embeddings-2312.14005</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-choice-of-the-optimal-temporal-support-for-audio-classification-with-pre-trained-embeddings-2312.14005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-choice-of-the-optimal-temporal-support-for-audio-classification-with-pre-trained-embeddings-2312.14005"/></url>
<url><loc>https://scifaro.com/en/abs/total-variation-in-popular-rap-vocals-from-2009-2023-extension-of-the-analysis-by-georgieva-ripolles-mcfee-2312.14036</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/total-variation-in-popular-rap-vocals-from-2009-2023-extension-of-the-analysis-by-georgieva-ripolles-mcfee-2312.14036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/total-variation-in-popular-rap-vocals-from-2009-2023-extension-of-the-analysis-by-georgieva-ripolles-mcfee-2312.14036"/></url>
<url><loc>https://scifaro.com/en/abs/zmm-tts-zero-shot-multilingual-and-multispeaker-speech-synthesis-conditioned-on-self-supervised-discrete-speech-representations-2312.14398</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zmm-tts-zero-shot-multilingual-and-multispeaker-speech-synthesis-conditioned-on-self-supervised-discrete-speech-representations-2312.14398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zmm-tts-zero-shot-multilingual-and-multispeaker-speech-synthesis-conditioned-on-self-supervised-discrete-speech-representations-2312.14398"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-harmonic-parameter-estimation-using-differentiable-dsp-and-spectral-optimal-transport-2312.14507</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-harmonic-parameter-estimation-using-differentiable-dsp-and-spectral-optimal-transport-2312.14507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-harmonic-parameter-estimation-using-differentiable-dsp-and-spectral-optimal-transport-2312.14507"/></url>
<url><loc>https://scifaro.com/en/abs/creating-new-voices-using-normalizing-flows-2312.14569</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-new-voices-using-normalizing-flows-2312.14569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-new-voices-using-normalizing-flows-2312.14569"/></url>
<url><loc>https://scifaro.com/en/abs/room-acoustic-rendering-networks-with-control-of-scattering-and-early-reflections-2312.14658</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-acoustic-rendering-networks-with-control-of-scattering-and-early-reflections-2312.14658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-acoustic-rendering-networks-with-control-of-scattering-and-early-reflections-2312.14658"/></url>
<url><loc>https://scifaro.com/en/abs/the-effects-of-signal-to-noise-ratio-on-generative-adversarial-networks-applied-to-marine-bioacoustic-data-2312.14806</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effects-of-signal-to-noise-ratio-on-generative-adversarial-networks-applied-to-marine-bioacoustic-data-2312.14806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effects-of-signal-to-noise-ratio-on-generative-adversarial-networks-applied-to-marine-bioacoustic-data-2312.14806"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-vad-systems-based-on-multi-task-learning-with-improved-model-structures-2312.14860</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-vad-systems-based-on-multi-task-learning-with-improved-model-structures-2312.14860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-vad-systems-based-on-multi-task-learning-with-improved-model-structures-2312.14860"/></url>
<url><loc>https://scifaro.com/en/abs/saic-integration-of-speech-anonymization-and-identity-classification-2312.15190</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saic-integration-of-speech-anonymization-and-identity-classification-2312.15190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saic-integration-of-speech-anonymization-and-identity-classification-2312.15190"/></url>
<url><loc>https://scifaro.com/en/abs/transface-unit-based-audio-visual-speech-synthesizer-for-talking-head-translation-2312.15197</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transface-unit-based-audio-visual-speech-synthesizer-for-talking-head-translation-2312.15197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transface-unit-based-audio-visual-speech-synthesizer-for-talking-head-translation-2312.15197"/></url>
<url><loc>https://scifaro.com/en/abs/combinatorial-music-generation-model-with-song-structure-graph-analysis-2312.15400</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combinatorial-music-generation-model-with-song-structure-graph-analysis-2312.15400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combinatorial-music-generation-model-with-song-structure-graph-analysis-2312.15400"/></url>
<url><loc>https://scifaro.com/en/abs/dsnet-disentangled-siamese-network-with-neutral-calibration-for-speech-emotion-recognition-2312.15593</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsnet-disentangled-siamese-network-with-neutral-calibration-for-speech-emotion-recognition-2312.15593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsnet-disentangled-siamese-network-with-neutral-calibration-for-speech-emotion-recognition-2312.15593"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-as-a-predictor-leveraging-self-supervised-learning-for-zero-shot-mos-prediction-2312.15616</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-as-a-predictor-leveraging-self-supervised-learning-for-zero-shot-mos-prediction-2312.15616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-as-a-predictor-leveraging-self-supervised-learning-for-zero-shot-mos-prediction-2312.15616"/></url>
<url><loc>https://scifaro.com/en/abs/balanced-snr-aware-distillation-for-guided-text-to-audio-generation-2312.15628</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/balanced-snr-aware-distillation-for-guided-text-to-audio-generation-2312.15628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/balanced-snr-aware-distillation-for-guided-text-to-audio-generation-2312.15628"/></url>
<url><loc>https://scifaro.com/en/abs/audiobox-unified-audio-generation-with-natural-language-prompts-2312.15821</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiobox-unified-audio-generation-with-natural-language-prompts-2312.15821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiobox-unified-audio-generation-with-natural-language-prompts-2312.15821"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-few-shot-bird-sound-classification-2312.15824</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-few-shot-bird-sound-classification-2312.15824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-few-shot-bird-sound-classification-2312.15824"/></url>
<url><loc>https://scifaro.com/en/abs/enchantdance-unveiling-the-potential-of-music-driven-dance-movement-2312.15946</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enchantdance-unveiling-the-potential-of-music-driven-dance-movement-2312.15946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enchantdance-unveiling-the-potential-of-music-driven-dance-movement-2312.15946"/></url>
<url><loc>https://scifaro.com/en/abs/soundcount-sound-counting-from-raw-audio-with-dyadic-decomposition-neural-network-2312.16149</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundcount-sound-counting-from-raw-audio-with-dyadic-decomposition-neural-network-2312.16149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundcount-sound-counting-from-raw-audio-with-dyadic-decomposition-neural-network-2312.16149"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-salient-representations-and-label-variance-in-dimensional-speech-emotion-analysis-2312.16180</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-salient-representations-and-label-variance-in-dimensional-speech-emotion-analysis-2312.16180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-salient-representations-and-label-variance-in-dimensional-speech-emotion-analysis-2312.16180"/></url>
<url><loc>https://scifaro.com/en/abs/deformable-audio-transformer-for-audio-event-detection-2312.16228</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deformable-audio-transformer-for-audio-event-detection-2312.16228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deformable-audio-transformer-for-audio-event-detection-2312.16228"/></url>
<url><loc>https://scifaro.com/en/abs/frame-level-emotional-state-alignment-method-for-speech-emotion-recognition-2312.16383</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-level-emotional-state-alignment-method-for-speech-emotion-recognition-2312.16383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-level-emotional-state-alignment-method-for-speech-emotion-recognition-2312.16383"/></url>
<url><loc>https://scifaro.com/en/abs/ae-flow-autoencoder-normalizing-flow-2312.16552</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ae-flow-autoencoder-normalizing-flow-2312.16552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ae-flow-autoencoder-normalizing-flow-2312.16552"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-pretraining-for-robust-personalized-voice-activity-detection-in-adverse-conditions-2312.16613</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-pretraining-for-robust-personalized-voice-activity-detection-in-adverse-conditions-2312.16613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-pretraining-for-robust-personalized-voice-activity-detection-in-adverse-conditions-2312.16613"/></url>
<url><loc>https://scifaro.com/en/abs/remixed2remixed-domain-adaptation-for-speech-enhancement-by-noise2noise-learning-with-remixing-2312.16836</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remixed2remixed-domain-adaptation-for-speech-enhancement-by-noise2noise-learning-with-remixing-2312.16836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remixed2remixed-domain-adaptation-for-speech-enhancement-by-noise2noise-learning-with-remixing-2312.16836"/></url>
<url><loc>https://scifaro.com/en/abs/accent-vits-accent-transfer-for-end-to-end-tts-2312.16850</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-vits-accent-transfer-for-end-to-end-tts-2312.16850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-vits-accent-transfer-for-end-to-end-tts-2312.16850"/></url>
<url><loc>https://scifaro.com/en/abs/jeffreys-divergence-based-regularization-of-neural-network-output-distribution-applied-to-speaker-recognition-2312.16885</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jeffreys-divergence-based-regularization-of-neural-network-output-distribution-applied-to-speaker-recognition-2312.16885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jeffreys-divergence-based-regularization-of-neural-network-output-distribution-applied-to-speaker-recognition-2312.16885"/></url>
<url><loc>https://scifaro.com/en/abs/beast-online-joint-beat-and-downbeat-tracking-based-on-streaming-transformer-2312.17156</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beast-online-joint-beat-and-downbeat-tracking-based-on-streaming-transformer-2312.17156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beast-online-joint-beat-and-downbeat-tracking-based-on-streaming-transformer-2312.17156"/></url>
<url><loc>https://scifaro.com/en/abs/revolutionizing-personalized-voice-synthesis-the-journey-towards-emotional-and-individual-authenticity-with-divse-dynamic-individual-voice-synthesis-engine-2312.17281</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revolutionizing-personalized-voice-synthesis-the-journey-towards-emotional-and-individual-authenticity-with-divse-dynamic-individual-voice-synthesis-engine-2312.17281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revolutionizing-personalized-voice-synthesis-the-journey-towards-emotional-and-individual-authenticity-with-divse-dynamic-individual-voice-synthesis-engine-2312.17281"/></url>
<url><loc>https://scifaro.com/en/abs/the-arrow-of-time-in-music-revisiting-the-temporal-structure-of-music-with-distinguishability-and-unique-orientability-as-the-anchor-point-2312.17633</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-arrow-of-time-in-music-revisiting-the-temporal-structure-of-music-with-distinguishability-and-unique-orientability-as-the-anchor-point-2312.17633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-arrow-of-time-in-music-revisiting-the-temporal-structure-of-music-with-distinguishability-and-unique-orientability-as-the-anchor-point-2312.17633"/></url>
<url><loc>https://scifaro.com/en/abs/ai-and-tempo-estimation-a-review-2401.00209</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-and-tempo-estimation-a-review-2401.00209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-and-tempo-estimation-a-review-2401.00209"/></url>
<url><loc>https://scifaro.com/en/abs/online-symbolic-music-alignment-with-offline-reinforcement-learning-2401.00466</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-symbolic-music-alignment-with-offline-reinforcement-learning-2401.00466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-symbolic-music-alignment-with-offline-reinforcement-learning-2401.00466"/></url>
<url><loc>https://scifaro.com/en/abs/sounding-out-reconstruction-error-based-evaluation-of-generative-models-of-expressive-performance-2401.00471</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sounding-out-reconstruction-error-based-evaluation-of-generative-models-of-expressive-performance-2401.00471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sounding-out-reconstruction-error-based-evaluation-of-generative-models-of-expressive-performance-2401.00471"/></url>
<url><loc>https://scifaro.com/en/abs/e-chat-emotion-sensitive-spoken-dialogue-system-with-large-language-models-2401.00475</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e-chat-emotion-sensitive-spoken-dialogue-system-with-large-language-models-2401.00475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e-chat-emotion-sensitive-spoken-dialogue-system-with-large-language-models-2401.00475"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-pre-trained-asr-system-fine-tuning-for-dysarthric-speech-recognition-using-adversarial-data-augmentation-2401.00662</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-pre-trained-asr-system-fine-tuning-for-dysarthric-speech-recognition-using-adversarial-data-augmentation-2401.00662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-pre-trained-asr-system-fine-tuning-for-dysarthric-speech-recognition-using-adversarial-data-augmentation-2401.00662"/></url>
<url><loc>https://scifaro.com/en/abs/auffusion-leveraging-the-power-of-diffusion-and-large-language-models-for-text-to-audio-generation-2401.01044</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auffusion-leveraging-the-power-of-diffusion-and-large-language-models-for-text-to-audio-generation-2401.01044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auffusion-leveraging-the-power-of-diffusion-and-large-language-models-for-text-to-audio-generation-2401.01044"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-multi-modal-control-in-music-driven-dance-generation-2401.01382</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-multi-modal-control-in-music-driven-dance-generation-2401.01382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-multi-modal-control-in-music-driven-dance-generation-2401.01382"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-fastpitch-chunk-based-high-quality-text-to-speech-2401.01755</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-fastpitch-chunk-based-high-quality-text-to-speech-2401.01755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-fastpitch-chunk-based-high-quality-text-to-speech-2401.01755"/></url>
<url><loc>https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-the-sinkhorn-divergence-source-model-for-blind-source-separation-2401.01762</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-the-sinkhorn-divergence-source-model-for-blind-source-separation-2401.01762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-the-sinkhorn-divergence-source-model-for-blind-source-separation-2401.01762"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-blind-speech-source-separation-with-a-disjoint-constraint-source-model-2401.01763</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-blind-speech-source-separation-with-a-disjoint-constraint-source-model-2401.01763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-blind-speech-source-separation-with-a-disjoint-constraint-source-model-2401.01763"/></url>
<url><loc>https://scifaro.com/en/abs/generating-rhythm-game-music-with-jukebox-2401.01997</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-rhythm-game-music-with-jukebox-2401.01997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-rhythm-game-music-with-jukebox-2401.01997"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-zero-shot-multi-speaker-tts-with-negated-speaker-representations-2401.02014</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-zero-shot-multi-speaker-tts-with-negated-speaker-representations-2401.02014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-zero-shot-multi-speaker-tts-with-negated-speaker-representations-2401.02014"/></url>
<url><loc>https://scifaro.com/en/abs/poscuda-position-based-convolution-for-unlearnable-audio-datasets-2401.02135</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/poscuda-position-based-convolution-for-unlearnable-audio-datasets-2401.02135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/poscuda-position-based-convolution-for-unlearnable-audio-datasets-2401.02135"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-residual-neural-network-for-musical-shape-evaluation-in-piano-performance-assessment-2401.02566</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-residual-neural-network-for-musical-shape-evaluation-in-piano-performance-assessment-2401.02566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-residual-neural-network-for-musical-shape-evaluation-in-piano-performance-assessment-2401.02566"/></url>
<url><loc>https://scifaro.com/en/abs/towards-weakly-supervised-text-to-audio-grounding-2401.02584</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-weakly-supervised-text-to-audio-grounding-2401.02584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-weakly-supervised-text-to-audio-grounding-2401.02584"/></url>
<url><loc>https://scifaro.com/en/abs/gradient-weighting-for-speaker-verification-in-extremely-low-signal-to-noise-ratio-2401.02626</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gradient-weighting-for-speaker-verification-in-extremely-low-signal-to-noise-ratio-2401.02626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gradient-weighting-for-speaker-verification-in-extremely-low-signal-to-noise-ratio-2401.02626"/></url>
<url><loc>https://scifaro.com/en/abs/musicaog-an-energy-based-model-for-learning-and-sampling-a-hierarchical-representation-of-symbolic-music-2401.02678</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicaog-an-energy-based-model-for-learning-and-sampling-a-hierarchical-representation-of-symbolic-music-2401.02678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicaog-an-energy-based-model-for-learning-and-sampling-a-hierarchical-representation-of-symbolic-music-2401.02678"/></url>
<url><loc>https://scifaro.com/en/abs/an-ai-enabled-bias-free-respiratory-disease-diagnosis-model-using-cough-audio-a-case-study-for-covid-19-2401.02996</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ai-enabled-bias-free-respiratory-disease-diagnosis-model-using-cough-audio-a-case-study-for-covid-19-2401.02996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ai-enabled-bias-free-respiratory-disease-diagnosis-model-using-cough-audio-a-case-study-for-covid-19-2401.02996"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-modalities-knowledge-distillation-and-masked-training-for-translating-multi-modal-emotion-recognition-to-uni-modal-speech-only-emotion-recognition-2401.03000</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-modalities-knowledge-distillation-and-masked-training-for-translating-multi-modal-emotion-recognition-to-uni-modal-speech-only-emotion-recognition-2401.03000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-modalities-knowledge-distillation-and-masked-training-for-translating-multi-modal-emotion-recognition-to-uni-modal-speech-only-emotion-recognition-2401.03000"/></url>
<url><loc>https://scifaro.com/en/abs/mlca-avsr-multi-layer-cross-attention-fusion-based-audio-visual-speech-recognition-2401.03424</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mlca-avsr-multi-layer-cross-attention-fusion-based-audio-visual-speech-recognition-2401.03424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mlca-avsr-multi-layer-cross-attention-fusion-based-audio-visual-speech-recognition-2401.03424"/></url>
<url><loc>https://scifaro.com/en/abs/icmc-asr-the-icassp-2024-in-car-multi-channel-automatic-speech-recognition-challenge-2401.03473</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icmc-asr-the-icassp-2024-in-car-multi-channel-automatic-speech-recognition-challenge-2401.03473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icmc-asr-the-icassp-2024-in-car-multi-channel-automatic-speech-recognition-challenge-2401.03473"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-quality-based-multi-strategy-approach-for-target-speaker-extraction-in-the-misp-2023-challenge-2401.03697</loc><lastmod>2024-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-quality-based-multi-strategy-approach-for-target-speaker-extraction-in-the-misp-2023-challenge-2401.03697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-quality-based-multi-strategy-approach-for-target-speaker-extraction-in-the-misp-2023-challenge-2401.03697"/></url>
<url><loc>https://scifaro.com/en/abs/djcm-a-deep-joint-cascade-model-for-singing-voice-separation-and-vocal-pitch-estimation-2401.03856</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/djcm-a-deep-joint-cascade-model-for-singing-voice-separation-and-vocal-pitch-estimation-2401.03856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/djcm-a-deep-joint-cascade-model-for-singing-voice-separation-and-vocal-pitch-estimation-2401.03856"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-encoding-network-for-multi-talker-speech-recognition-2401.04152</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-encoding-network-for-multi-talker-speech-recognition-2401.04152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-encoding-network-for-multi-talker-speech-recognition-2401.04152"/></url>
<url><loc>https://scifaro.com/en/abs/rad-net-a-repairing-and-denoising-network-for-speech-signal-improvement-2401.04389</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rad-net-a-repairing-and-denoising-network-for-speech-signal-improvement-2401.04389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rad-net-a-repairing-and-denoising-network-for-speech-signal-improvement-2401.04389"/></url>
<url><loc>https://scifaro.com/en/abs/hyperganstrument-instrument-sound-synthesis-and-editing-with-pitch-invariant-hypernetworks-2401.04558</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperganstrument-instrument-sound-synthesis-and-editing-with-pitch-invariant-hypernetworks-2401.04558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperganstrument-instrument-sound-synthesis-and-editing-with-pitch-invariant-hypernetworks-2401.04558"/></url>
<url><loc>https://scifaro.com/en/abs/masked-audio-generation-using-a-single-non-autoregressive-transformer-2401.04577</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-audio-generation-using-a-single-non-autoregressive-transformer-2401.04577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-audio-generation-using-a-single-non-autoregressive-transformer-2401.04577"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-a-comparative-analysis-of-cnn-and-xgboost-approaches-with-mel-frequency-cepstral-coefficients-and-mel-spectrograms-2401.04737</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-a-comparative-analysis-of-cnn-and-xgboost-approaches-with-mel-frequency-cepstral-coefficients-and-mel-spectrograms-2401.04737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-a-comparative-analysis-of-cnn-and-xgboost-approaches-with-mel-frequency-cepstral-coefficients-and-mel-spectrograms-2401.04737"/></url>
<url><loc>https://scifaro.com/en/abs/diffsheg-a-diffusion-based-approach-for-real-time-speech-driven-holistic-3d-expression-and-gesture-generation-2401.04747</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsheg-a-diffusion-based-approach-for-real-time-speech-driven-holistic-3d-expression-and-gesture-generation-2401.04747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsheg-a-diffusion-based-approach-for-real-time-speech-driven-holistic-3d-expression-and-gesture-generation-2401.04747"/></url>
<url><loc>https://scifaro.com/en/abs/mutox-universal-multilingual-audio-based-toxicity-dataset-and-zero-shot-detector-2401.05060</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mutox-universal-multilingual-audio-based-toxicity-dataset-and-zero-shot-detector-2401.05060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mutox-universal-multilingual-audio-based-toxicity-dataset-and-zero-shot-detector-2401.05060"/></url>
<url><loc>https://scifaro.com/en/abs/singer-identity-representation-learning-using-self-supervised-techniques-2401.05064</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singer-identity-representation-learning-using-self-supervised-techniques-2401.05064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singer-identity-representation-learning-using-self-supervised-techniques-2401.05064"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-zero-shot-text-to-speech-synthesis-conditioned-on-self-supervised-speech-representation-model-with-adapters-2401.05111</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-zero-shot-text-to-speech-synthesis-conditioned-on-self-supervised-speech-representation-model-with-adapters-2401.05111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-zero-shot-text-to-speech-synthesis-conditioned-on-self-supervised-speech-representation-model-with-adapters-2401.05111"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-and-hybrid-features-for-replay-and-deep-fake-audio-detection-2401.05614</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-and-hybrid-features-for-replay-and-deep-fake-audio-detection-2401.05614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-and-hybrid-features-for-replay-and-deep-fake-audio-detection-2401.05614"/></url>
<url><loc>https://scifaro.com/en/abs/intuitive-control-of-scraping-and-rubbing-through-audio-tactile-synthesis-2401.05757</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intuitive-control-of-scraping-and-rubbing-through-audio-tactile-synthesis-2401.05757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intuitive-control-of-scraping-and-rubbing-through-audio-tactile-synthesis-2401.05757"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-loss-based-frame-wise-feature-disentanglement-for-polyphonic-sound-event-detection-2401.05850</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-loss-based-frame-wise-feature-disentanglement-for-polyphonic-sound-event-detection-2401.05850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-loss-based-frame-wise-feature-disentanglement-for-polyphonic-sound-event-detection-2401.05850"/></url>
<url><loc>https://scifaro.com/en/abs/lcb-net-long-context-biasing-for-audio-visual-speech-recognition-2401.06390</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lcb-net-long-context-biasing-for-audio-visual-speech-recognition-2401.06390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lcb-net-long-context-biasing-for-audio-visual-speech-recognition-2401.06390"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-conversion-mitigating-device-variability-in-sound-event-classification-2401.06913</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-conversion-mitigating-device-variability-in-sound-event-classification-2401.06913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-conversion-mitigating-device-variability-in-sound-event-classification-2401.06913"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-midivae-fusing-track-and-bar-view-representations-for-long-multi-track-symbolic-music-generation-2401.07532</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-midivae-fusing-track-and-bar-view-representations-for-long-multi-track-symbolic-music-generation-2401.07532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-midivae-fusing-track-and-bar-view-representations-for-long-multi-track-symbolic-music-generation-2401.07532"/></url>
<url><loc>https://scifaro.com/en/abs/decoupled-spatial-and-temporal-processing-for-resource-efficient-multichannel-speech-enhancement-2401.07879</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupled-spatial-and-temporal-processing-for-resource-efficient-multichannel-speech-enhancement-2401.07879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupled-spatial-and-temporal-processing-for-resource-efficient-multichannel-speech-enhancement-2401.07879"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-importance-of-neural-wiener-filter-for-resource-efficient-multichannel-speech-enhancement-2401.07882</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-importance-of-neural-wiener-filter-for-resource-efficient-multichannel-speech-enhancement-2401.07882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-importance-of-neural-wiener-filter-for-resource-efficient-multichannel-speech-enhancement-2401.07882"/></url>
<url><loc>https://scifaro.com/en/abs/mcmchaos-improvising-rap-music-with-mcmc-methods-and-chaos-theory-2401.07967</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mcmchaos-improvising-rap-music-with-mcmc-methods-and-chaos-theory-2401.07967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mcmchaos-improvising-rap-music-with-mcmc-methods-and-chaos-theory-2401.07967"/></url>
<url><loc>https://scifaro.com/en/abs/durflex-evc-duration-flexible-emotional-voice-conversion-leveraging-discrete-representations-without-text-alignment-2401.08095</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/durflex-evc-duration-flexible-emotional-voice-conversion-leveraging-discrete-representations-without-text-alignment-2401.08095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/durflex-evc-duration-flexible-emotional-voice-conversion-leveraging-discrete-representations-without-text-alignment-2401.08095"/></url>
<url><loc>https://scifaro.com/en/abs/learning-disentangled-speech-representations-with-contrastive-learning-and-time-invariant-retrieval-2401.08096</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-disentangled-speech-representations-with-contrastive-learning-and-time-invariant-retrieval-2401.08096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-disentangled-speech-representations-with-contrastive-learning-and-time-invariant-retrieval-2401.08096"/></url>
<url><loc>https://scifaro.com/en/abs/diffrent-a-diffusion-model-for-recording-environment-transfer-of-speech-2401.08102</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffrent-a-diffusion-model-for-recording-environment-transfer-of-speech-2401.08102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffrent-a-diffusion-model-for-recording-environment-transfer-of-speech-2401.08102"/></url>
<url><loc>https://scifaro.com/en/abs/livescaler-live-control-of-the-harmony-of-an-electronic-music-track-2401.08181</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/livescaler-live-control-of-the-harmony-of-an-electronic-music-track-2401.08181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/livescaler-live-control-of-the-harmony-of-an-electronic-music-track-2401.08181"/></url>
<url><loc>https://scifaro.com/en/abs/from-coarse-to-fine-efficient-training-for-audio-spectrogram-transformers-2401.08415</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-coarse-to-fine-efficient-training-for-audio-spectrogram-transformers-2401.08415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-coarse-to-fine-efficient-training-for-audio-spectrogram-transformers-2401.08415"/></url>
<url><loc>https://scifaro.com/en/abs/robust-doa-estimation-using-deep-acoustic-imaging-2401.08717</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-doa-estimation-using-deep-acoustic-imaging-2401.08717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-doa-estimation-using-deep-acoustic-imaging-2401.08717"/></url>
<url><loc>https://scifaro.com/en/abs/notsofar-1-challenge-new-datasets-baseline-and-tasks-for-distant-meeting-transcription-2401.08887</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/notsofar-1-challenge-new-datasets-baseline-and-tasks-for-distant-meeting-transcription-2401.08887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/notsofar-1-challenge-new-datasets-baseline-and-tasks-for-distant-meeting-transcription-2401.08887"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effect-of-data-augmentation-on-local-embedding-properties-in-the-contrastive-learning-of-music-audio-representations-2401.08889</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effect-of-data-augmentation-on-local-embedding-properties-in-the-contrastive-learning-of-music-audio-representations-2401.08889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effect-of-data-augmentation-on-local-embedding-properties-in-the-contrastive-learning-of-music-audio-representations-2401.08889"/></url>
<url><loc>https://scifaro.com/en/abs/tempo-estimation-as-fully-self-supervised-binary-classification-2401.08891</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tempo-estimation-as-fully-self-supervised-binary-classification-2401.08891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tempo-estimation-as-fully-self-supervised-binary-classification-2401.08891"/></url>
<url><loc>https://scifaro.com/en/abs/similar-but-faster-manipulation-of-tempo-in-music-audio-embeddings-for-tempo-prediction-and-search-2401.08902</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/similar-but-faster-manipulation-of-tempo-in-music-audio-embeddings-for-tempo-prediction-and-search-2401.08902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/similar-but-faster-manipulation-of-tempo-in-music-audio-embeddings-for-tempo-prediction-and-search-2401.08902"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-lyrics-alignment-system-using-chroma-and-phonetic-features-for-classical-vocal-performance-2401.09200</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-lyrics-alignment-system-using-chroma-and-phonetic-features-for-classical-vocal-performance-2401.09200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-lyrics-alignment-system-using-chroma-and-phonetic-features-for-classical-vocal-performance-2401.09200"/></url>
<url><loc>https://scifaro.com/en/abs/t-foley-a-controllable-waveform-domain-diffusion-model-for-temporal-event-guided-foley-sound-synthesis-2401.09294</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-foley-a-controllable-waveform-domain-diffusion-model-for-temporal-event-guided-foley-sound-synthesis-2401.09294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-foley-a-controllable-waveform-domain-diffusion-model-for-temporal-event-guided-foley-sound-synthesis-2401.09294"/></url>
<url><loc>https://scifaro.com/en/abs/voxceleb-esp-preliminary-experiments-detecting-spanish-celebrities-from-their-voices-2401.09441</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxceleb-esp-preliminary-experiments-detecting-spanish-celebrities-from-their-voices-2401.09441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxceleb-esp-preliminary-experiments-detecting-spanish-celebrities-from-their-voices-2401.09441"/></url>
<url><loc>https://scifaro.com/en/abs/mlaad-the-multi-language-audio-anti-spoofing-dataset-2401.09512</loc><lastmod>2026-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mlaad-the-multi-language-audio-anti-spoofing-dataset-2401.09512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mlaad-the-multi-language-audio-anti-spoofing-dataset-2401.09512"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-independent-speech-emotion-recognition-using-dynamic-joint-distribution-adaptation-2401.09752</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-independent-speech-emotion-recognition-using-dynamic-joint-distribution-adaptation-2401.09752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-independent-speech-emotion-recognition-using-dynamic-joint-distribution-adaptation-2401.09752"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-recurrent-neural-network-for-automatic-behavior-laying-hen-recognition-2401.09880</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-recurrent-neural-network-for-automatic-behavior-laying-hen-recognition-2401.09880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-recurrent-neural-network-for-automatic-behavior-laying-hen-recognition-2401.09880"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-lightweight-neural-differential-dsp-vocoder-for-high-quality-speech-synthesis-2401.10460</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-lightweight-neural-differential-dsp-vocoder-for-high-quality-speech-synthesis-2401.10460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-lightweight-neural-differential-dsp-vocoder-for-high-quality-speech-synthesis-2401.10460"/></url>
<url><loc>https://scifaro.com/en/abs/aat-adapting-audio-transformer-for-various-acoustics-recognition-tasks-2401.10544</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aat-adapting-audio-transformer-for-various-acoustics-recognition-tasks-2401.10544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aat-adapting-audio-transformer-for-various-acoustics-recognition-tasks-2401.10544"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-sentiment-analysis-with-missing-modality-a-knowledge-transfer-approach-2401.10747</loc><lastmod>2026-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-sentiment-analysis-with-missing-modality-a-knowledge-transfer-approach-2401.10747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-sentiment-analysis-with-missing-modality-a-knowledge-transfer-approach-2401.10747"/></url>
<url><loc>https://scifaro.com/en/abs/asm-audio-spectrogram-mixer-2401.11102</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asm-audio-spectrogram-mixer-2401.11102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asm-audio-spectrogram-mixer-2401.11102"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-protection-for-privacy-in-offloaded-speech-understanding-2401.11983</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-protection-for-privacy-in-offloaded-speech-understanding-2401.11983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-protection-for-privacy-in-offloaded-speech-understanding-2401.11983"/></url>
<url><loc>https://scifaro.com/en/abs/resource-constrained-stereo-singing-voice-cancellation-2401.12068</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-constrained-stereo-singing-voice-cancellation-2401.12068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-constrained-stereo-singing-voice-cancellation-2401.12068"/></url>
<url><loc>https://scifaro.com/en/abs/ditto-diffusion-inference-time-t-optimization-for-music-generation-2401.12179</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditto-diffusion-inference-time-t-optimization-for-music-generation-2401.12179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditto-diffusion-inference-time-t-optimization-for-music-generation-2401.12179"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploratory-study-of-multimodal-physiological-data-in-jazz-improvisation-using-basic-machine-learning-techniques-2401.12266</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploratory-study-of-multimodal-physiological-data-in-jazz-improvisation-using-basic-machine-learning-techniques-2401.12266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploratory-study-of-multimodal-physiological-data-in-jazz-improvisation-using-basic-machine-learning-techniques-2401.12266"/></url>
<url><loc>https://scifaro.com/en/abs/eend-m2f-masked-attention-mask-transformers-for-speaker-diarization-2401.12600</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eend-m2f-masked-attention-mask-transformers-for-speaker-diarization-2401.12600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eend-m2f-masked-attention-mask-transformers-for-speaker-diarization-2401.12600"/></url>
<url><loc>https://scifaro.com/en/abs/moodloopgp-generating-emotion-conditioned-loop-tablature-music-with-multi-granular-features-2401.12656</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moodloopgp-generating-emotion-conditioned-loop-tablature-music-with-multi-granular-features-2401.12656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moodloopgp-generating-emotion-conditioned-loop-tablature-music-with-multi-granular-features-2401.12656"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-aware-contrastive-adaptation-network-for-source-free-cross-corpus-speech-emotion-recognition-2401.12925</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-aware-contrastive-adaptation-network-for-source-free-cross-corpus-speech-emotion-recognition-2401.12925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-aware-contrastive-adaptation-network-for-source-free-cross-corpus-speech-emotion-recognition-2401.12925"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-acoustic-guitar-sound-synthesis-with-an-instrument-specific-input-representation-and-diffusion-outpainting-2401.13498</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-acoustic-guitar-sound-synthesis-with-an-instrument-specific-input-representation-and-diffusion-outpainting-2401.13498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-acoustic-guitar-sound-synthesis-with-an-instrument-specific-input-representation-and-diffusion-outpainting-2401.13498"/></url>
<url><loc>https://scifaro.com/en/abs/a-phoneme-scale-assessment-of-multichannel-speech-enhancement-algorithms-2401.13548</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-phoneme-scale-assessment-of-multichannel-speech-enhancement-algorithms-2401.13548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-phoneme-scale-assessment-of-multichannel-speech-enhancement-algorithms-2401.13548"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-impaired-users-using-intermediate-asr-features-and-human-memory-models-2401.13611</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-impaired-users-using-intermediate-asr-features-and-human-memory-models-2401.13611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-impaired-users-using-intermediate-asr-features-and-human-memory-models-2401.13611"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-nvidia-s-multi-speaker-multi-lingual-tts-systems-with-zero-shot-tts-to-indic-languages-2401.13851</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-nvidia-s-multi-speaker-multi-lingual-tts-systems-with-zero-shot-tts-to-indic-languages-2401.13851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-nvidia-s-multi-speaker-multi-lingual-tts-systems-with-zero-shot-tts-to-indic-languages-2401.13851"/></url>
<url><loc>https://scifaro.com/en/abs/tdfnet-an-efficient-audio-visual-speech-separation-model-with-top-down-fusion-2401.14185</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tdfnet-an-efficient-audio-visual-speech-separation-model-with-top-down-fusion-2401.14185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tdfnet-an-efficient-audio-visual-speech-separation-model-with-top-down-fusion-2401.14185"/></url>
<url><loc>https://scifaro.com/en/abs/speech-foundation-models-on-intelligibility-prediction-for-hearing-impaired-listeners-2401.14289</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-foundation-models-on-intelligibility-prediction-for-hearing-impaired-listeners-2401.14289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-foundation-models-on-intelligibility-prediction-for-hearing-impaired-listeners-2401.14289"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2024-speech-signal-improvement-challenge-2401.14444</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2024-speech-signal-improvement-challenge-2401.14444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2024-speech-signal-improvement-challenge-2401.14444"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-musical-roots-applying-audio-embeddings-to-empower-influence-attribution-for-a-generative-music-model-2401.14542</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-musical-roots-applying-audio-embeddings-to-empower-influence-attribution-for-a-generative-music-model-2401.14542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-musical-roots-applying-audio-embeddings-to-empower-influence-attribution-for-a-generative-music-model-2401.14542"/></url>
<url><loc>https://scifaro.com/en/abs/unit-dsr-dysarthric-speech-reconstruction-system-using-speech-unit-normalization-2401.14664</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unit-dsr-dysarthric-speech-reconstruction-system-using-speech-unit-normalization-2401.14664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unit-dsr-dysarthric-speech-reconstruction-system-using-speech-unit-normalization-2401.14664"/></url>
<url><loc>https://scifaro.com/en/abs/expressivity-aware-music-performance-retrieval-using-mid-level-perceptual-features-and-emotion-word-embeddings-2401.14826</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressivity-aware-music-performance-retrieval-using-mid-level-perceptual-features-and-emotion-word-embeddings-2401.14826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressivity-aware-music-performance-retrieval-using-mid-level-perceptual-features-and-emotion-word-embeddings-2401.14826"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-parameters-of-vowel-sounds-of-russian-and-english-languages-2401.14890</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-parameters-of-vowel-sounds-of-russian-and-english-languages-2401.14890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-parameters-of-vowel-sounds-of-russian-and-english-languages-2401.14890"/></url>
<url><loc>https://scifaro.com/en/abs/amuse-adaptive-multimodal-analysis-for-speaker-emotion-recognition-in-group-conversations-2401.15164</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amuse-adaptive-multimodal-analysis-for-speaker-emotion-recognition-in-group-conversations-2401.15164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amuse-adaptive-multimodal-analysis-for-speaker-emotion-recognition-in-group-conversations-2401.15164"/></url>
<url><loc>https://scifaro.com/en/abs/music-auto-tagging-with-robust-music-representation-learned-via-domain-adversarial-training-2401.15323</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-auto-tagging-with-robust-music-representation-learned-via-domain-adversarial-training-2401.15323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-auto-tagging-with-robust-music-representation-learned-via-domain-adversarial-training-2401.15323"/></url>
<url><loc>https://scifaro.com/en/abs/masked-audio-modeling-with-clap-and-multi-objective-learning-2401.15953</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-audio-modeling-with-clap-and-multi-objective-learning-2401.15953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-audio-modeling-with-clap-and-multi-objective-learning-2401.15953"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-target-speech-extraction-enhancing-personalized-diarization-and-extraction-on-complex-recordings-2401.15993</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-target-speech-extraction-enhancing-personalized-diarization-and-extraction-on-complex-recordings-2401.15993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-target-speech-extraction-enhancing-personalized-diarization-and-extraction-on-complex-recordings-2401.15993"/></url>
<url><loc>https://scifaro.com/en/abs/pbscr-the-piano-bootleg-score-composer-recognition-dataset-2401.16803</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pbscr-the-piano-bootleg-score-composer-recognition-dataset-2401.16803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pbscr-the-piano-bootleg-score-composer-recognition-dataset-2401.16803"/></url>
<url><loc>https://scifaro.com/en/abs/speechbertscore-reference-aware-automatic-evaluation-of-speech-generation-leveraging-nlp-evaluation-metrics-2401.16812</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechbertscore-reference-aware-automatic-evaluation-of-speech-generation-leveraging-nlp-evaluation-metrics-2401.16812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechbertscore-reference-aware-automatic-evaluation-of-speech-generation-leveraging-nlp-evaluation-metrics-2401.16812"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-sound-event-localization-and-detection-in-real-360-degree-audio-visual-soundscapes-2401.17129</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-sound-event-localization-and-detection-in-real-360-degree-audio-visual-soundscapes-2401.17129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-sound-event-localization-and-detection-in-real-360-degree-audio-visual-soundscapes-2401.17129"/></url>
<url><loc>https://scifaro.com/en/abs/songbsab-a-dual-prevention-approach-against-singing-voice-conversion-based-illegal-song-covers-2401.17133</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songbsab-a-dual-prevention-approach-against-singing-voice-conversion-based-illegal-song-covers-2401.17133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songbsab-a-dual-prevention-approach-against-singing-voice-conversion-based-illegal-song-covers-2401.17133"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-spk-full-pipeline-speaker-embedding-toolkit-with-reproducible-recipes-self-supervised-front-ends-and-off-the-shelf-models-2401.17230</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-spk-full-pipeline-speaker-embedding-toolkit-with-reproducible-recipes-self-supervised-front-ends-and-off-the-shelf-models-2401.17230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-spk-full-pipeline-speaker-embedding-toolkit-with-reproducible-recipes-self-supervised-front-ends-and-off-the-shelf-models-2401.17230"/></url>
<url><loc>https://scifaro.com/en/abs/proactive-detection-of-voice-cloning-with-localized-watermarking-2401.17264</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proactive-detection-of-voice-cloning-with-localized-watermarking-2401.17264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proactive-detection-of-voice-cloning-with-localized-watermarking-2401.17264"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-data-scaling-up-an-introduction-to-ace-opencpop-and-ace-kising-2401.17619</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-data-scaling-up-an-introduction-to-ace-opencpop-and-ace-kising-2401.17619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-data-scaling-up-an-introduction-to-ace-opencpop-and-ace-kising-2401.17619"/></url>
<url><loc>https://scifaro.com/en/abs/harnessing-smartwatch-microphone-sensors-for-cough-detection-and-classification-2401.17738</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harnessing-smartwatch-microphone-sensors-for-cough-detection-and-classification-2401.17738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harnessing-smartwatch-microphone-sensors-for-cough-detection-and-classification-2401.17738"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-audio-visual-features-with-pretrained-av-hubert-for-multi-modal-dysarthric-speech-reconstruction-2401.17796</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-audio-visual-features-with-pretrained-av-hubert-for-multi-modal-dysarthric-speech-reconstruction-2401.17796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-audio-visual-features-with-pretrained-av-hubert-for-multi-modal-dysarthric-speech-reconstruction-2401.17796"/></url>
<url><loc>https://scifaro.com/en/abs/dance-to-music-generation-with-encoder-based-textual-inversion-2401.17800</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dance-to-music-generation-with-encoder-based-textual-inversion-2401.17800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dance-to-music-generation-with-encoder-based-textual-inversion-2401.17800"/></url>
<url><loc>https://scifaro.com/en/abs/can-you-remove-the-downstream-model-for-speaker-recognition-with-self-supervised-speech-features-2402.00340</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-you-remove-the-downstream-model-for-speaker-recognition-with-self-supervised-speech-features-2402.00340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-you-remove-the-downstream-model-for-speaker-recognition-with-self-supervised-speech-features-2402.00340"/></url>
<url><loc>https://scifaro.com/en/abs/baton-aligning-text-to-audio-model-with-human-preference-feedback-2402.00744</loc><lastmod>2024-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/baton-aligning-text-to-audio-model-with-human-preference-feedback-2402.00744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/baton-aligning-text-to-audio-model-with-human-preference-feedback-2402.00744"/></url>
<url><loc>https://scifaro.com/en/abs/eva-gan-enhanced-various-audio-generation-via-scalable-generative-adversarial-networks-2402.00892</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eva-gan-enhanced-various-audio-generation-via-scalable-generative-adversarial-networks-2402.00892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eva-gan-enhanced-various-audio-generation-via-scalable-generative-adversarial-networks-2402.00892"/></url>
<url><loc>https://scifaro.com/en/abs/screening-method-for-early-dementia-using-sound-objects-as-voice-biomarkers-2402.00897</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/screening-method-for-early-dementia-using-sound-objects-as-voice-biomarkers-2402.00897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/screening-method-for-early-dementia-using-sound-objects-as-voice-biomarkers-2402.00897"/></url>
<url><loc>https://scifaro.com/en/abs/staa-net-a-sparse-and-transferable-adversarial-attack-for-speech-emotion-recognition-2402.01227</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/staa-net-a-sparse-and-transferable-adversarial-attack-for-speech-emotion-recognition-2402.01227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/staa-net-a-sparse-and-transferable-adversarial-attack-for-speech-emotion-recognition-2402.01227"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-transferability-of-large-scale-self-supervision-to-few-shot-audio-classification-2402.01274</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-transferability-of-large-scale-self-supervision-to-few-shot-audio-classification-2402.01274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-transferability-of-large-scale-self-supervision-to-few-shot-audio-classification-2402.01274"/></url>
<url><loc>https://scifaro.com/en/abs/bass-accompaniment-generation-via-latent-diffusion-2402.01412</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bass-accompaniment-generation-via-latent-diffusion-2402.01412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bass-accompaniment-generation-via-latent-diffusion-2402.01412"/></url>
<url><loc>https://scifaro.com/en/abs/objective-and-subjective-evaluation-of-speech-enhancement-methods-in-the-udase-task-of-the-7th-chime-challenge-2402.01413</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-and-subjective-evaluation-of-speech-enhancement-methods-in-the-udase-task-of-the-7th-chime-challenge-2402.01413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-and-subjective-evaluation-of-speech-enhancement-methods-in-the-udase-task-of-the-7th-chime-challenge-2402.01413"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-analysis-of-robust-automatic-piano-transcription-2402.01424</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-analysis-of-robust-automatic-piano-transcription-2402.01424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-analysis-of-robust-automatic-piano-transcription-2402.01424"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-cross-domain-singing-voice-synthesis-via-reduced-self-supervised-speech-representations-2402.01520</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-cross-domain-singing-voice-synthesis-via-reduced-self-supervised-speech-representations-2402.01520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-cross-domain-singing-voice-synthesis-via-reduced-self-supervised-speech-representations-2402.01520"/></url>
<url><loc>https://scifaro.com/en/abs/spiking-music-audio-compression-with-event-based-auto-encoders-2402.01571</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spiking-music-audio-compression-with-event-based-auto-encoders-2402.01571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spiking-music-audio-compression-with-event-based-auto-encoders-2402.01571"/></url>
<url><loc>https://scifaro.com/en/abs/specdiff-gan-a-spectrally-shaped-noise-diffusion-gan-for-speech-and-music-synthesis-2402.01753</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specdiff-gan-a-spectrally-shaped-noise-diffusion-gan-for-speech-and-music-synthesis-2402.01753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specdiff-gan-a-spectrally-shaped-noise-diffusion-gan-for-speech-and-music-synthesis-2402.01753"/></url>
<url><loc>https://scifaro.com/en/abs/creating-a-synthesizer-from-schr-odinger-s-equation-2402.01773</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-a-synthesizer-from-schr-odinger-s-equation-2402.01773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-a-synthesizer-from-schr-odinger-s-equation-2402.01773"/></url>
<url><loc>https://scifaro.com/en/abs/ks-net-multi-band-joint-speech-restoration-and-enhancement-network-for-2024-icassp-ssi-challenge-2402.01808</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ks-net-multi-band-joint-speech-restoration-and-enhancement-network-for-2024-icassp-ssi-challenge-2402.01808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ks-net-multi-band-joint-speech-restoration-and-enhancement-network-for-2024-icassp-ssi-challenge-2402.01808"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-cognitive-decline-from-spoken-language-through-feature-selection-and-the-bag-of-acoustic-words-model-2402.01824</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-cognitive-decline-from-spoken-language-through-feature-selection-and-the-bag-of-acoustic-words-model-2402.01824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-cognitive-decline-from-spoken-language-through-feature-selection-and-the-bag-of-acoustic-words-model-2402.01824"/></url>
<url><loc>https://scifaro.com/en/abs/audio-flamingo-a-novel-audio-language-model-with-few-shot-learning-and-dialogue-abilities-2402.01831</loc><lastmod>2024-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-flamingo-a-novel-audio-language-model-with-few-shot-learning-and-dialogue-abilities-2402.01831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-flamingo-a-novel-audio-language-model-with-few-shot-learning-and-dialogue-abilities-2402.01831"/></url>
<url><loc>https://scifaro.com/en/abs/natural-language-guidance-of-high-fidelity-text-to-speech-with-synthetic-annotations-2402.01912</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/natural-language-guidance-of-high-fidelity-text-to-speech-with-synthetic-annotations-2402.01912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/natural-language-guidance-of-high-fidelity-text-to-speech-with-synthetic-annotations-2402.01912"/></url>
<url><loc>https://scifaro.com/en/abs/sentiment-analysis-in-non-fixed-length-audios-using-a-fully-convolutional-neural-network-2402.02184</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sentiment-analysis-in-non-fixed-length-audios-using-a-fully-convolutional-neural-network-2402.02184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sentiment-analysis-in-non-fixed-length-audios-using-a-fully-convolutional-neural-network-2402.02184"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-data-augmentation-for-robust-speaker-verification-2402.02699</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-data-augmentation-for-robust-speaker-verification-2402.02699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-data-augmentation-for-robust-speaker-verification-2402.02699"/></url>
<url><loc>https://scifaro.com/en/abs/how-phonemes-contribute-to-deep-speaker-models-2402.02730</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-phonemes-contribute-to-deep-speaker-models-2402.02730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-phonemes-contribute-to-deep-speaker-models-2402.02730"/></url>
<url><loc>https://scifaro.com/en/abs/focal-modulation-networks-for-interpretable-sound-classification-2402.02754</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/focal-modulation-networks-for-interpretable-sound-classification-2402.02754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/focal-modulation-networks-for-interpretable-sound-classification-2402.02754"/></url>
<url><loc>https://scifaro.com/en/abs/dual-knowledge-distillation-for-efficient-sound-event-detection-2402.02781</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-knowledge-distillation-for-efficient-sound-event-detection-2402.02781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-knowledge-distillation-for-efficient-sound-event-detection-2402.02781"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-federated-self-supervised-learning-for-general-purpose-audio-understanding-2402.02889</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-federated-self-supervised-learning-for-general-purpose-audio-understanding-2402.02889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-federated-self-supervised-learning-for-general-purpose-audio-understanding-2402.02889"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-study-of-the-current-state-of-the-art-in-nepali-automatic-speech-recognition-systems-2402.03050</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-study-of-the-current-state-of-the-art-in-nepali-automatic-speech-recognition-systems-2402.03050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-study-of-the-current-state-of-the-art-in-nepali-automatic-speech-recognition-systems-2402.03050"/></url>
<url><loc>https://scifaro.com/en/abs/ispa-inter-species-phonetic-alphabet-for-transcribing-animal-sounds-2402.03269</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ispa-inter-species-phonetic-alphabet-for-transcribing-animal-sounds-2402.03269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ispa-inter-species-phonetic-alphabet-for-transcribing-animal-sounds-2402.03269"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-sound-source-localization-using-a-hybrid-time-and-frequency-domain-model-2402.03867</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-sound-source-localization-using-a-hybrid-time-and-frequency-domain-model-2402.03867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-sound-source-localization-using-a-hybrid-time-and-frequency-domain-model-2402.03867"/></url>
<url><loc>https://scifaro.com/en/abs/bidirectional-autoregressive-diffusion-model-for-dance-generation-2402.04356</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bidirectional-autoregressive-diffusion-model-for-dance-generation-2402.04356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bidirectional-autoregressive-diffusion-model-for-dance-generation-2402.04356"/></url>
<url><loc>https://scifaro.com/en/abs/review-of-cetacean-s-click-detection-algorithms-2402.04735</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/review-of-cetacean-s-click-detection-algorithms-2402.04735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/review-of-cetacean-s-click-detection-algorithms-2402.04735"/></url>
<url><loc>https://scifaro.com/en/abs/fast-timing-conditioned-latent-audio-diffusion-2402.04825</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-timing-conditioned-latent-audio-diffusion-2402.04825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-timing-conditioned-latent-audio-diffusion-2402.04825"/></url>
<url><loc>https://scifaro.com/en/abs/multispecies-bird-sound-recognition-using-a-fully-convolutional-neural-network-2402.05489</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multispecies-bird-sound-recognition-using-a-fully-convolutional-neural-network-2402.05489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multispecies-bird-sound-recognition-using-a-fully-convolutional-neural-network-2402.05489"/></url>
<url><loc>https://scifaro.com/en/abs/listening-between-the-lines-synthetic-speech-detection-disregarding-verbal-content-2402.05567</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-between-the-lines-synthetic-speech-detection-disregarding-verbal-content-2402.05567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-between-the-lines-synthetic-speech-detection-disregarding-verbal-content-2402.05567"/></url>
<url><loc>https://scifaro.com/en/abs/musicmagus-zero-shot-text-to-music-editing-via-diffusion-models-2402.06178</loc><lastmod>2024-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicmagus-zero-shot-text-to-music-editing-via-diffusion-models-2402.06178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicmagus-zero-shot-text-to-music-editing-via-diffusion-models-2402.06178"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-approach-to-voice-authenticity-2402.06304</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-approach-to-voice-authenticity-2402.06304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-approach-to-voice-authenticity-2402.06304"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-spatial-diversity-for-increasing-the-robustness-of-sound-source-localization-systems-against-reverberation-2402.06411</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-spatial-diversity-for-increasing-the-robustness-of-sound-source-localization-systems-against-reverberation-2402.06411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-spatial-diversity-for-increasing-the-robustness-of-sound-source-localization-systems-against-reverberation-2402.06411"/></url>
<url><loc>https://scifaro.com/en/abs/analytical-model-for-the-relation-between-signal-bandwidth-and-spatial-resolution-in-steered-response-power-phase-transform-srp-phat-maps-2402.06586</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analytical-model-for-the-relation-between-signal-bandwidth-and-spatial-resolution-in-steered-response-power-phase-transform-srp-phat-maps-2402.06586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analytical-model-for-the-relation-between-signal-bandwidth-and-spatial-resolution-in-steered-response-power-phase-transform-srp-phat-maps-2402.06586"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-co-creativity-using-total-information-flow-2402.06810</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-co-creativity-using-total-information-flow-2402.06810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-co-creativity-using-total-information-flow-2402.06810"/></url>
<url><loc>https://scifaro.com/en/abs/speech-motion-anomaly-detection-via-cross-modal-translation-of-4d-motion-fields-from-tagged-mri-2402.06984</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-motion-anomaly-detection-via-cross-modal-translation-of-4d-motion-fields-from-tagged-mri-2402.06984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-motion-anomaly-detection-via-cross-modal-translation-of-4d-motion-fields-from-tagged-mri-2402.06984"/></url>
<url><loc>https://scifaro.com/en/abs/cacophony-an-improved-contrastive-audio-text-model-2402.06986</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cacophony-an-improved-contrastive-audio-text-model-2402.06986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cacophony-an-improved-contrastive-audio-text-model-2402.06986"/></url>
<url><loc>https://scifaro.com/en/abs/speech-rhythm-based-speaker-embeddings-extraction-from-phonemes-and-phoneme-duration-for-multi-speaker-speech-synthesis-2402.07085</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-rhythm-based-speaker-embeddings-extraction-from-phonemes-and-phoneme-duration-for-multi-speaker-speech-synthesis-2402.07085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-rhythm-based-speaker-embeddings-extraction-from-phonemes-and-phoneme-duration-for-multi-speaker-speech-synthesis-2402.07085"/></url>
<url><loc>https://scifaro.com/en/abs/mint-boosting-audio-language-model-via-multi-target-pre-training-and-instruction-tuning-2402.07485</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mint-boosting-audio-language-model-via-multi-target-pre-training-and-instruction-tuning-2402.07485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mint-boosting-audio-language-model-via-multi-target-pre-training-and-instruction-tuning-2402.07485"/></url>
<url><loc>https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-covid-19-from-crowd-sourced-respiratory-voice-data-2402.07619</loc><lastmod>2026-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-covid-19-from-crowd-sourced-respiratory-voice-data-2402.07619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-a-multi-variate-prediction-model-for-covid-19-from-crowd-sourced-respiratory-voice-data-2402.07619"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-pre-trained-autoencoders-for-interpretable-prototype-learning-of-music-audio-2402.09318</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-pre-trained-autoencoders-for-interpretable-prototype-learning-of-music-audio-2402.09318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-pre-trained-autoencoders-for-interpretable-prototype-learning-of-music-audio-2402.09318"/></url>
<url><loc>https://scifaro.com/en/abs/arrange-inpaint-and-refine-steerable-long-term-music-audio-generation-and-editing-via-content-based-controls-2402.09508</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arrange-inpaint-and-refine-steerable-long-term-music-audio-generation-and-editing-via-content-based-controls-2402.09508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arrange-inpaint-and-refine-steerable-long-term-music-audio-generation-and-editing-via-content-based-controls-2402.09508"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-for-contrastive-audio-language-models-2402.09585</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-for-contrastive-audio-language-models-2402.09585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-for-contrastive-audio-language-models-2402.09585"/></url>
<url><loc>https://scifaro.com/en/abs/a-cross-talk-robust-multichannel-vad-model-for-multiparty-agent-interactions-trained-using-synthetic-re-recordings-2402.09797</loc><lastmod>2024-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cross-talk-robust-multichannel-vad-model-for-multiparty-agent-interactions-trained-using-synthetic-re-recordings-2402.09797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cross-talk-robust-multichannel-vad-model-for-multiparty-agent-interactions-trained-using-synthetic-re-recordings-2402.09797"/></url>
<url><loc>https://scifaro.com/en/abs/muchin-a-chinese-colloquial-description-benchmark-for-evaluating-language-models-in-the-field-of-music-2402.09871</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muchin-a-chinese-colloquial-description-benchmark-for-evaluating-language-models-in-the-field-of-music-2402.09871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muchin-a-chinese-colloquial-description-benchmark-for-evaluating-language-models-in-the-field-of-music-2402.09871"/></url>
<url><loc>https://scifaro.com/en/abs/ml-aspa-a-contemplation-of-machine-learning-based-acoustic-signal-processing-analysis-for-sounds-strains-emerging-technology-2402.10005</loc><lastmod>2024-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ml-aspa-a-contemplation-of-machine-learning-based-acoustic-signal-processing-analysis-for-sounds-strains-emerging-technology-2402.10005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ml-aspa-a-contemplation-of-machine-learning-based-acoustic-signal-processing-analysis-for-sounds-strains-emerging-technology-2402.10005"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-unsupervised-and-text-based-audio-editing-using-ddpm-inversion-2402.10009</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-unsupervised-and-text-based-audio-editing-using-ddpm-inversion-2402.10009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-unsupervised-and-text-based-audio-editing-using-ddpm-inversion-2402.10009"/></url>
<url><loc>https://scifaro.com/en/abs/tuning-in-analysis-of-audio-classifier-performance-in-clinical-settings-with-limited-data-2402.10100</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tuning-in-analysis-of-audio-classifier-performance-in-clinical-settings-with-limited-data-2402.10100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tuning-in-analysis-of-audio-classifier-performance-in-clinical-settings-with-limited-data-2402.10100"/></url>
<url><loc>https://scifaro.com/en/abs/deepsrgm-sequence-classification-and-ranking-in-indian-classical-music-with-deep-learning-2402.10168</loc><lastmod>2024-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepsrgm-sequence-classification-and-ranking-in-indian-classical-music-with-deep-learning-2402.10168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepsrgm-sequence-classification-and-ranking-in-indian-classical-music-with-deep-learning-2402.10168"/></url>
<url><loc>https://scifaro.com/en/abs/antideepfake-ai-for-deep-fake-speech-recognition-2402.10218</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/antideepfake-ai-for-deep-fake-speech-recognition-2402.10218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/antideepfake-ai-for-deep-fake-speech-recognition-2402.10218"/></url>
<url><loc>https://scifaro.com/en/abs/engraving-oriented-joint-estimation-of-pitch-spelling-and-local-and-global-keys-2402.10247</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/engraving-oriented-joint-estimation-of-pitch-spelling-and-local-and-global-keys-2402.10247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/engraving-oriented-joint-estimation-of-pitch-spelling-and-local-and-global-keys-2402.10247"/></url>
<url><loc>https://scifaro.com/en/abs/apcodec-a-neural-audio-codec-with-parallel-amplitude-and-phase-spectrum-encoding-and-decoding-2402.10533</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apcodec-a-neural-audio-codec-with-parallel-amplitude-and-phase-spectrum-encoding-and-decoding-2402.10533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apcodec-a-neural-audio-codec-with-parallel-amplitude-and-phase-spectrum-encoding-and-decoding-2402.10533"/></url>
<url><loc>https://scifaro.com/en/abs/learning-disentangled-audio-representations-through-controlled-synthesis-2402.10547</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-disentangled-audio-representations-through-controlled-synthesis-2402.10547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-disentangled-audio-representations-through-controlled-synthesis-2402.10547"/></url>
<url><loc>https://scifaro.com/en/abs/low-power-snn-based-audio-source-localisation-using-a-hilbert-transform-spike-encoding-scheme-2402.11748</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-power-snn-based-audio-source-localisation-using-a-hilbert-transform-spike-encoding-scheme-2402.11748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-power-snn-based-audio-source-localisation-using-a-hilbert-transform-spike-encoding-scheme-2402.11748"/></url>
<url><loc>https://scifaro.com/en/abs/unraveling-complex-data-diversity-in-underwater-acoustic-target-recognition-through-convolution-based-mixture-of-experts-2402.11919</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unraveling-complex-data-diversity-in-underwater-acoustic-target-recognition-through-convolution-based-mixture-of-experts-2402.11919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unraveling-complex-data-diversity-in-underwater-acoustic-target-recognition-through-convolution-based-mixture-of-experts-2402.11919"/></url>
<url><loc>https://scifaro.com/en/abs/soft-weighted-crossentropy-loss-for-continous-alzheimer-s-disease-detection-2402.11931</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-weighted-crossentropy-loss-for-continous-alzheimer-s-disease-detection-2402.11931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-weighted-crossentropy-loss-for-continous-alzheimer-s-disease-detection-2402.11931"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-emotion-recognition-from-raw-audio-with-sinc-convolution-2402.11954</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-from-raw-audio-with-sinc-convolution-2402.11954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-from-raw-audio-with-sinc-convolution-2402.11954"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-semantic-latent-space-of-diffusion-based-text-to-speech-models-2402.12423</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-semantic-latent-space-of-diffusion-based-text-to-speech-models-2402.12423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-semantic-latent-space-of-diffusion-based-text-to-speech-models-2402.12423"/></url>
<url><loc>https://scifaro.com/en/abs/secp-a-speech-enhancement-based-curation-pipeline-for-scalable-acquisition-of-clean-speech-2402.12482</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/secp-a-speech-enhancement-based-curation-pipeline-for-scalable-acquisition-of-clean-speech-2402.12482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/secp-a-speech-enhancement-based-curation-pipeline-for-scalable-acquisition-of-clean-speech-2402.12482"/></url>
<url><loc>https://scifaro.com/en/abs/guiding-the-underwater-acoustic-target-recognition-with-interpretable-contrastive-learning-2402.12658</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guiding-the-underwater-acoustic-target-recognition-with-interpretable-contrastive-learning-2402.12658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guiding-the-underwater-acoustic-target-recognition-with-interpretable-contrastive-learning-2402.12658"/></url>
<url><loc>https://scifaro.com/en/abs/singvisio-visual-analytics-of-diffusion-model-for-singing-voice-conversion-2402.12660</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singvisio-visual-analytics-of-diffusion-model-for-singing-voice-conversion-2402.12660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singvisio-visual-analytics-of-diffusion-model-for-singing-voice-conversion-2402.12660"/></url>
<url><loc>https://scifaro.com/en/abs/breaking-down-power-barriers-in-on-device-streaming-asr-insights-and-solutions-2402.13076</loc><lastmod>2025-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breaking-down-power-barriers-in-on-device-streaming-asr-insights-and-solutions-2402.13076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breaking-down-power-barriers-in-on-device-streaming-asr-insights-and-solutions-2402.13076"/></url>
<url><loc>https://scifaro.com/en/abs/structure-informed-positional-encoding-for-music-generation-2402.13301</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structure-informed-positional-encoding-for-music-generation-2402.13301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structure-informed-positional-encoding-for-music-generation-2402.13301"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-batch-size-on-contrastive-self-supervised-speech-representation-learning-2402.13723</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-batch-size-on-contrastive-self-supervised-speech-representation-learning-2402.13723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-batch-size-on-contrastive-self-supervised-speech-representation-learning-2402.13723"/></url>
<url><loc>https://scifaro.com/en/abs/music-style-transfer-with-time-varying-inversion-of-diffusion-models-2402.13763</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-style-transfer-with-time-varying-inversion-of-diffusion-models-2402.13763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-style-transfer-with-time-varying-inversion-of-diffusion-models-2402.13763"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-audio-fingerprinting-accuracy-addressing-background-noise-and-distortion-challenges-2402.13957</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-audio-fingerprinting-accuracy-addressing-background-noise-and-distortion-challenges-2402.13957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-audio-fingerprinting-accuracy-addressing-background-noise-and-distortion-challenges-2402.13957"/></url>
<url><loc>https://scifaro.com/en/abs/compression-robust-synthetic-speech-detection-using-patched-spectrogram-transformer-2402.14205</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compression-robust-synthetic-speech-detection-using-patched-spectrogram-transformer-2402.14205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compression-robust-synthetic-speech-detection-using-patched-spectrogram-transformer-2402.14205"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-generation-with-non-differentiable-rule-guided-diffusion-2402.14285</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-generation-with-non-differentiable-rule-guided-diffusion-2402.14285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-generation-with-non-differentiable-rule-guided-diffusion-2402.14285"/></url>
<url><loc>https://scifaro.com/en/abs/human-brain-exhibits-distinct-patterns-when-listening-to-fake-versus-real-audio-preliminary-evidence-2402.14982</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-brain-exhibits-distinct-patterns-when-listening-to-fake-versus-real-audio-preliminary-evidence-2402.14982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-brain-exhibits-distinct-patterns-when-listening-to-fake-versus-real-audio-preliminary-evidence-2402.14982"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-music-generation-in-the-context-of-interaction-2402.15294</loc><lastmod>2024-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-music-generation-in-the-context-of-interaction-2402.15294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-music-generation-in-the-context-of-interaction-2402.15294"/></url>
<url><loc>https://scifaro.com/en/abs/gla-grad-a-griffin-lim-extended-waveform-generation-diffusion-model-2402.15516</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gla-grad-a-griffin-lim-extended-waveform-generation-diffusion-model-2402.15516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gla-grad-a-griffin-lim-extended-waveform-generation-diffusion-model-2402.15516"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-and-lexical-discovery-of-a-canine-language-using-hubert-2402.15985</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-and-lexical-discovery-of-a-canine-language-using-hubert-2402.15985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-and-lexical-discovery-of-a-canine-language-using-hubert-2402.15985"/></url>
<url><loc>https://scifaro.com/en/abs/chatmusician-understanding-and-generating-music-intrinsically-with-llm-2402.16153</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chatmusician-understanding-and-generating-music-intrinsically-with-llm-2402.16153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chatmusician-understanding-and-generating-music-intrinsically-with-llm-2402.16153"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speech-quality-estimation-and-enhancement-using-only-clean-speech-2402.16321</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speech-quality-estimation-and-enhancement-using-only-clean-speech-2402.16321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speech-quality-estimation-and-enhancement-using-only-clean-speech-2402.16321"/></url>
<url><loc>https://scifaro.com/en/abs/towards-environmental-preference-based-speech-enhancement-for-individualised-multi-modal-hearing-aids-2402.16757</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-environmental-preference-based-speech-enhancement-for-individualised-multi-modal-hearing-aids-2402.16757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-environmental-preference-based-speech-enhancement-for-individualised-multi-modal-hearing-aids-2402.16757"/></url>
<url><loc>https://scifaro.com/en/abs/the-icassp-2024-audio-deep-packet-loss-concealment-challenge-2402.16927</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-icassp-2024-audio-deep-packet-loss-concealment-challenge-2402.16927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-icassp-2024-audio-deep-packet-loss-concealment-challenge-2402.16927"/></url>
<url><loc>https://scifaro.com/en/abs/experimental-study-enhancing-voice-spoofing-detection-models-with-wav2vec-2-0-2402.17127</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experimental-study-enhancing-voice-spoofing-detection-models-with-wav2vec-2-0-2402.17127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experimental-study-enhancing-voice-spoofing-detection-models-with-wav2vec-2-0-2402.17127"/></url>
<url><loc>https://scifaro.com/en/abs/edtc-enhance-depth-of-text-comprehension-in-automated-audio-captioning-2402.17259</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edtc-enhance-depth-of-text-comprehension-in-automated-audio-captioning-2402.17259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edtc-enhance-depth-of-text-comprehension-in-automated-audio-captioning-2402.17259"/></url>
<url><loc>https://scifaro.com/en/abs/automated-classification-of-phonetic-segments-in-child-speech-using-raw-ultrasound-imaging-2402.17482</loc><lastmod>2025-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-classification-of-phonetic-segments-in-child-speech-using-raw-ultrasound-imaging-2402.17482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-classification-of-phonetic-segments-in-child-speech-using-raw-ultrasound-imaging-2402.17482"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-voice-messages-emovome-database-emotion-recognition-in-spontaneous-voice-messages-2402.17496</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-voice-messages-emovome-database-emotion-recognition-in-spontaneous-voice-messages-2402.17496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-voice-messages-emovome-database-emotion-recognition-in-spontaneous-voice-messages-2402.17496"/></url>
<url><loc>https://scifaro.com/en/abs/songcomposer-a-large-language-model-for-lyric-and-melody-generation-in-song-composition-2402.17645</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songcomposer-a-large-language-model-for-lyric-and-melody-generation-in-song-composition-2402.17645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songcomposer-a-large-language-model-for-lyric-and-melody-generation-in-song-composition-2402.17645"/></url>
<url><loc>https://scifaro.com/en/abs/bytecomposer-a-human-like-melody-composition-method-based-on-language-model-agent-2402.17785</loc><lastmod>2024-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bytecomposer-a-human-like-melody-composition-method-based-on-language-model-agent-2402.17785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bytecomposer-a-human-like-melody-composition-method-based-on-language-model-agent-2402.17785"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-ai-assisted-tagging-of-deepfake-audio-calls-using-challenge-response-2402.18085</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-ai-assisted-tagging-of-deepfake-audio-calls-using-challenge-response-2402.18085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-ai-assisted-tagging-of-deepfake-audio-calls-using-challenge-response-2402.18085"/></url>
<url><loc>https://scifaro.com/en/abs/convdtw-acs-audio-segmentation-for-track-type-detection-during-car-manufacturing-2402.18204</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convdtw-acs-audio-segmentation-for-track-type-detection-during-car-manufacturing-2402.18204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convdtw-acs-audio-segmentation-for-track-type-detection-during-car-manufacturing-2402.18204"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-adapter-for-noise-robust-automatic-speech-recognition-2402.18275</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-adapter-for-noise-robust-automatic-speech-recognition-2402.18275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-adapter-for-noise-robust-automatic-speech-recognition-2402.18275"/></url>
<url><loc>https://scifaro.com/en/abs/do-end-to-end-neural-diarization-attractors-need-to-encode-speaker-characteristic-information-2402.19325</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-end-to-end-neural-diarization-attractors-need-to-encode-speaker-characteristic-information-2402.19325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-end-to-end-neural-diarization-attractors-need-to-encode-speaker-characteristic-information-2402.19325"/></url>
<url><loc>https://scifaro.com/en/abs/unraveling-adversarial-examples-against-speaker-identification-techniques-for-attack-detection-and-victim-model-classification-2402.19355</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unraveling-adversarial-examples-against-speaker-identification-techniques-for-attack-detection-and-victim-model-classification-2402.19355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unraveling-adversarial-examples-against-speaker-identification-techniques-for-attack-detection-and-victim-model-classification-2402.19355"/></url>
<url><loc>https://scifaro.com/en/abs/probing-the-information-encoded-in-neural-based-acoustic-models-of-automatic-speech-recognition-systems-2402.19443</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-the-information-encoded-in-neural-based-acoustic-models-of-automatic-speech-recognition-systems-2402.19443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-the-information-encoded-in-neural-based-acoustic-models-of-automatic-speech-recognition-systems-2402.19443"/></url>
<url><loc>https://scifaro.com/en/abs/voxgenesis-unsupervised-discovery-of-latent-speaker-manifold-for-speech-synthesis-2403.00529</loc><lastmod>2024-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxgenesis-unsupervised-discovery-of-latent-speaker-manifold-for-speech-synthesis-2403.00529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxgenesis-unsupervised-discovery-of-latent-speaker-manifold-for-speech-synthesis-2403.00529"/></url>
<url><loc>https://scifaro.com/en/abs/structuring-concept-space-with-the-musical-circle-of-fifths-by-utilizing-music-grammar-based-activations-2403.00790</loc><lastmod>2026-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structuring-concept-space-with-the-musical-circle-of-fifths-by-utilizing-music-grammar-based-activations-2403.00790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structuring-concept-space-with-the-musical-circle-of-fifths-by-utilizing-music-grammar-based-activations-2403.00790"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-up-adaptive-filter-optimizers-2403.00977</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-up-adaptive-filter-optimizers-2403.00977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-up-adaptive-filter-optimizers-2403.00977"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speech-recognition-using-advanced-deep-learning-approaches-a-survey-2403.01255</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speech-recognition-using-advanced-deep-learning-approaches-a-survey-2403.01255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speech-recognition-using-advanced-deep-learning-approaches-a-survey-2403.01255"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audio-generation-diversity-with-visual-information-2403.01278</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audio-generation-diversity-with-visual-information-2403.01278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audio-generation-diversity-with-visual-information-2403.01278"/></url>
<url><loc>https://scifaro.com/en/abs/robust-wake-word-spotting-with-frame-level-cross-modal-attention-based-audio-visual-conformer-2403.01700</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-wake-word-spotting-with-frame-level-cross-modal-attention-based-audio-visual-conformer-2403.01700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-wake-word-spotting-with-frame-level-cross-modal-attention-based-audio-visual-conformer-2403.01700"/></url>
<url><loc>https://scifaro.com/en/abs/what-do-neural-networks-listen-to-exploring-the-crucial-bands-in-speech-enhancement-using-sinc-convolution-2403.01785</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-do-neural-networks-listen-to-exploring-the-crucial-bands-in-speech-enhancement-using-sinc-convolution-2403.01785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-do-neural-networks-listen-to-exploring-the-crucial-bands-in-speech-enhancement-using-sinc-convolution-2403.01785"/></url>
<url><loc>https://scifaro.com/en/abs/consep-a-noise-and-reverberation-robust-speech-separation-framework-by-magnitude-conditioning-2403.01792</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consep-a-noise-and-reverberation-robust-speech-separation-framework-by-magnitude-conditioning-2403.01792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consep-a-noise-and-reverberation-robust-speech-separation-framework-by-magnitude-conditioning-2403.01792"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-audio-deepfake-detection-system-via-multi-view-feature-2403.01960</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-audio-deepfake-detection-system-via-multi-view-feature-2403.01960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-audio-deepfake-detection-system-via-multi-view-feature-2403.01960"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-quantitative-emotion-editing-for-speech-generation-2403.02002</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-quantitative-emotion-editing-for-speech-generation-2403.02002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-quantitative-emotion-editing-for-speech-generation-2403.02002"/></url>
<url><loc>https://scifaro.com/en/abs/sa-sot-speaker-aware-serialized-output-training-for-multi-talker-asr-2403.02010</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-sot-speaker-aware-serialized-output-training-for-multi-talker-asr-2403.02010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-sot-speaker-aware-serialized-output-training-for-multi-talker-asr-2403.02010"/></url>
<url><loc>https://scifaro.com/en/abs/fighting-game-adaptive-background-music-for-improved-gameplay-2403.02701</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fighting-game-adaptive-background-music-for-improved-gameplay-2403.02701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fighting-game-adaptive-background-music-for-improved-gameplay-2403.02701"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-melody-generation-system-for-enhancing-the-creativity-of-musicians-2403.03395</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-melody-generation-system-for-enhancing-the-creativity-of-musicians-2403.03395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-melody-generation-system-for-enhancing-the-creativity-of-musicians-2403.03395"/></url>
<url><loc>https://scifaro.com/en/abs/crossnet-leveraging-global-cross-band-narrow-band-and-positional-encoding-for-single-and-multi-channel-speaker-separation-2403.03411</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossnet-leveraging-global-cross-band-narrow-band-and-positional-encoding-for-single-and-multi-channel-speaker-separation-2403.03411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossnet-leveraging-global-cross-band-narrow-band-and-positional-encoding-for-single-and-multi-channel-speaker-separation-2403.03411"/></url>
<url><loc>https://scifaro.com/en/abs/metamat-01-a-semi-analytic-solution-for-benchmarking-wave-propagation-simulations-of-homogeneous-absorbers-in-1d-3d-and-2d-2403.03510</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metamat-01-a-semi-analytic-solution-for-benchmarking-wave-propagation-simulations-of-homogeneous-absorbers-in-1d-3d-and-2d-2403.03510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metamat-01-a-semi-analytic-solution-for-benchmarking-wave-propagation-simulations-of-homogeneous-absorbers-in-1d-3d-and-2d-2403.03510"/></url>
<url><loc>https://scifaro.com/en/abs/non-verbal-information-in-spontaneous-speech-towards-a-new-framework-of-analysis-2403.03522</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-verbal-information-in-spontaneous-speech-towards-a-new-framework-of-analysis-2403.03522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-verbal-information-in-spontaneous-speech-towards-a-new-framework-of-analysis-2403.03522"/></url>
<url><loc>https://scifaro.com/en/abs/radia-radio-advertisement-detection-with-intelligent-analytics-2403.03538</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radia-radio-advertisement-detection-with-intelligent-analytics-2403.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radia-radio-advertisement-detection-with-intelligent-analytics-2403.03538"/></url>
<url><loc>https://scifaro.com/en/abs/can-audio-reveal-music-performance-difficulty-insights-from-the-piano-syllabus-dataset-2403.03947</loc><lastmod>2025-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-audio-reveal-music-performance-difficulty-insights-from-the-piano-syllabus-dataset-2403.03947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-audio-reveal-music-performance-difficulty-insights-from-the-piano-syllabus-dataset-2403.03947"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-attention-aggregation-for-language-agnostic-speaker-replication-2403.04111</loc><lastmod>2024-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-attention-aggregation-for-language-agnostic-speaker-replication-2403.04111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-attention-aggregation-for-language-agnostic-speaker-replication-2403.04111"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-dropout-induced-modality-bias-on-robustness-to-missing-video-frames-for-audio-visual-speech-recognition-2403.04245</loc><lastmod>2024-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-dropout-induced-modality-bias-on-robustness-to-missing-video-frames-for-audio-visual-speech-recognition-2403.04245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-dropout-induced-modality-bias-on-robustness-to-missing-video-frames-for-audio-visual-speech-recognition-2403.04245"/></url>
<url><loc>https://scifaro.com/en/abs/a-detailed-audio-text-data-simulation-pipeline-using-single-event-sounds-2403.04594</loc><lastmod>2024-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-detailed-audio-text-data-simulation-pipeline-using-single-event-sounds-2403.04594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-detailed-audio-text-data-simulation-pipeline-using-single-event-sounds-2403.04594"/></url>
<url><loc>https://scifaro.com/en/abs/rfwave-multi-band-rectified-flow-for-audio-waveform-reconstruction-2403.05010</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rfwave-multi-band-rectified-flow-for-audio-waveform-reconstruction-2403.05010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rfwave-multi-band-rectified-flow-for-audio-waveform-reconstruction-2403.05010"/></url>
<url><loc>https://scifaro.com/en/abs/spectrogram-based-detection-of-auto-tuned-vocals-in-music-recordings-2403.05380</loc><lastmod>2024-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrogram-based-detection-of-auto-tuned-vocals-in-music-recordings-2403.05380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrogram-based-detection-of-auto-tuned-vocals-in-music-recordings-2403.05380"/></url>
<url><loc>https://scifaro.com/en/abs/svad-a-robust-low-power-and-light-weight-voice-activity-detection-with-spiking-neural-networks-2403.05772</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svad-a-robust-low-power-and-light-weight-voice-activity-detection-with-spiking-neural-networks-2403.05772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svad-a-robust-low-power-and-light-weight-voice-activity-detection-with-spiking-neural-networks-2403.05772"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-textual-diffusion-model-for-converting-speech-signals-into-ultrasound-tongue-imaging-data-2403.05820</loc><lastmod>2024-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-textual-diffusion-model-for-converting-speech-signals-into-ultrasound-tongue-imaging-data-2403.05820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-textual-diffusion-model-for-converting-speech-signals-into-ultrasound-tongue-imaging-data-2403.05820"/></url>
<url><loc>https://scifaro.com/en/abs/ham-tts-hierarchical-acoustic-modeling-for-token-based-zero-shot-text-to-speech-with-model-and-data-scaling-2403.05989</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ham-tts-hierarchical-acoustic-modeling-for-token-based-zero-shot-text-to-speech-with-model-and-data-scaling-2403.05989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ham-tts-hierarchical-acoustic-modeling-for-token-based-zero-shot-text-to-speech-with-model-and-data-scaling-2403.05989"/></url>
<url><loc>https://scifaro.com/en/abs/towards-decoupling-frontend-enhancement-and-backend-recognition-in-monaural-robust-asr-2403.06387</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-decoupling-frontend-enhancement-and-backend-recognition-in-monaural-robust-asr-2403.06387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-decoupling-frontend-enhancement-and-backend-recognition-in-monaural-robust-asr-2403.06387"/></url>
<url><loc>https://scifaro.com/en/abs/cosine-scoring-with-uncertainty-for-neural-speaker-embedding-2403.06404</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosine-scoring-with-uncertainty-for-neural-speaker-embedding-2403.06404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosine-scoring-with-uncertainty-for-neural-speaker-embedding-2403.06404"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-long-term-streaming-neural-speech-enhancement-for-static-and-moving-speakers-2403.07675</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-long-term-streaming-neural-speech-enhancement-for-static-and-moving-speakers-2403.07675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-long-term-streaming-neural-speech-enhancement-for-static-and-moving-speakers-2403.07675"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-keyword-spotting-through-on-device-learnable-user-speech-characteristics-2403.07802</loc><lastmod>2024-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-keyword-spotting-through-on-device-learnable-user-speech-characteristics-2403.07802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-keyword-spotting-through-on-device-learnable-user-speech-characteristics-2403.07802"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-audio-generation-synchronized-with-videos-2403.07938</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-audio-generation-synchronized-with-videos-2403.07938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-audio-generation-synchronized-with-videos-2403.07938"/></url>
<url><loc>https://scifaro.com/en/abs/motifs-phrases-and-beyond-the-modelling-of-structure-in-symbolic-music-generation-2403.07995</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/motifs-phrases-and-beyond-the-modelling-of-structure-in-symbolic-music-generation-2403.07995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/motifs-phrases-and-beyond-the-modelling-of-structure-in-symbolic-music-generation-2403.07995"/></url>
<url><loc>https://scifaro.com/en/abs/em-tts-efficiently-trained-low-resource-mongolian-lightweight-text-to-speech-2403.08164</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/em-tts-efficiently-trained-low-resource-mongolian-lightweight-text-to-speech-2403.08164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/em-tts-efficiently-trained-low-resource-mongolian-lightweight-text-to-speech-2403.08164"/></url>
<url><loc>https://scifaro.com/en/abs/from-weak-to-strong-sound-event-labels-using-adaptive-change-point-detection-and-active-learning-2403.08525</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-weak-to-strong-sound-event-labels-using-adaptive-change-point-detection-and-active-learning-2403.08525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-weak-to-strong-sound-event-labels-using-adaptive-change-point-detection-and-active-learning-2403.08525"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-amp-modeling-from-data-to-controllable-guitar-amplifier-models-2403.08559</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-amp-modeling-from-data-to-controllable-guitar-amplifier-models-2403.08559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-amp-modeling-from-data-to-controllable-guitar-amplifier-models-2403.08559"/></url>
<url><loc>https://scifaro.com/en/abs/an-ai-driven-approach-to-wind-turbine-bearing-fault-diagnosis-from-acoustic-signals-2403.09030</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ai-driven-approach-to-wind-turbine-bearing-fault-diagnosis-from-acoustic-signals-2403.09030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ai-driven-approach-to-wind-turbine-bearing-fault-diagnosis-from-acoustic-signals-2403.09030"/></url>
<url><loc>https://scifaro.com/en/abs/more-than-words-advancements-and-challenges-in-speech-recognition-for-singing-2403.09298</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/more-than-words-advancements-and-challenges-in-speech-recognition-for-singing-2403.09298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/more-than-words-advancements-and-challenges-in-speech-recognition-for-singing-2403.09298"/></url>
<url><loc>https://scifaro.com/en/abs/a-practical-guide-to-spectrogram-analysis-for-audio-signal-processing-2403.09321</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-practical-guide-to-spectrogram-analysis-for-audio-signal-processing-2403.09321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-practical-guide-to-spectrogram-analysis-for-audio-signal-processing-2403.09321"/></url>
<url><loc>https://scifaro.com/en/abs/lm2d-lyrics-and-music-driven-dance-synthesis-2403.09407</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lm2d-lyrics-and-music-driven-dance-synthesis-2403.09407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lm2d-lyrics-and-music-driven-dance-synthesis-2403.09407"/></url>
<url><loc>https://scifaro.com/en/abs/the-neural-srp-method-for-positional-sound-source-localization-2403.09455</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-neural-srp-method-for-positional-sound-source-localization-2403.09455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-neural-srp-method-for-positional-sound-source-localization-2403.09455"/></url>
<url><loc>https://scifaro.com/en/abs/uamix-mae-efficient-tuning-of-pretrained-audio-transformers-with-unsupervised-audio-mixtures-2403.09579</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uamix-mae-efficient-tuning-of-pretrained-audio-transformers-with-unsupervised-audio-mixtures-2403.09579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uamix-mae-efficient-tuning-of-pretrained-audio-transformers-with-unsupervised-audio-mixtures-2403.09579"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-of-mixups-for-multi-label-classification-of-rare-anuran-sounds-2403.09598</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-of-mixups-for-multi-label-classification-of-rare-anuran-sounds-2403.09598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-of-mixups-for-multi-label-classification-of-rare-anuran-sounds-2403.09598"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-100-a-cross-lingual-benchmarking-dataset-for-the-classification-of-spoken-numbers-in-different-languages-2403.09753</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-100-a-cross-lingual-benchmarking-dataset-for-the-classification-of-spoken-numbers-in-different-languages-2403.09753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-100-a-cross-lingual-benchmarking-dataset-for-the-classification-of-spoken-numbers-in-different-languages-2403.09753"/></url>
<url><loc>https://scifaro.com/en/abs/mr-mt3-memory-retaining-multi-track-music-transcription-to-mitigate-instrument-leakage-2403.10024</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mr-mt3-memory-retaining-multi-track-music-transcription-to-mitigate-instrument-leakage-2403.10024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mr-mt3-memory-retaining-multi-track-music-transcription-to-mitigate-instrument-leakage-2403.10024"/></url>
<url><loc>https://scifaro.com/en/abs/multiscale-matching-driven-by-cross-modal-similarity-consistency-for-audio-text-retrieval-2403.10146</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiscale-matching-driven-by-cross-modal-similarity-consistency-for-audio-text-retrieval-2403.10146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiscale-matching-driven-by-cross-modal-similarity-consistency-for-audio-text-retrieval-2403.10146"/></url>
<url><loc>https://scifaro.com/en/abs/birdset-a-large-scale-dataset-for-audio-classification-in-avian-bioacoustics-2403.10380</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/birdset-a-large-scale-dataset-for-audio-classification-in-avian-bioacoustics-2403.10380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/birdset-a-large-scale-dataset-for-audio-classification-in-avian-bioacoustics-2403.10380"/></url>
<url><loc>https://scifaro.com/en/abs/musichifi-fast-high-fidelity-stereo-vocoding-2403.10493</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musichifi-fast-high-fidelity-stereo-vocoding-2403.10493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musichifi-fast-high-fidelity-stereo-vocoding-2403.10493"/></url>
<url><loc>https://scifaro.com/en/abs/on-device-domain-learning-for-keyword-spotting-on-low-power-extreme-edge-embedded-systems-2403.10549</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-device-domain-learning-for-keyword-spotting-on-low-power-extreme-edge-embedded-systems-2403.10549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-device-domain-learning-for-keyword-spotting-on-low-power-extreme-edge-embedded-systems-2403.10549"/></url>
<url><loc>https://scifaro.com/en/abs/coplay-audio-agnostic-cognitive-scaling-for-acoustic-sensing-2403.10796</loc><lastmod>2025-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coplay-audio-agnostic-cognitive-scaling-for-acoustic-sensing-2403.10796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coplay-audio-agnostic-cognitive-scaling-for-acoustic-sensing-2403.10796"/></url>
<url><loc>https://scifaro.com/en/abs/speech-driven-personalized-gesture-synthetics-harnessing-automatic-fuzzy-feature-inference-2403.10805</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-driven-personalized-gesture-synthetics-harnessing-automatic-fuzzy-feature-inference-2403.10805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-driven-personalized-gesture-synthetics-harnessing-automatic-fuzzy-feature-inference-2403.10805"/></url>
<url><loc>https://scifaro.com/en/abs/urban-sound-propagation-a-benchmark-for-1-step-generative-modeling-of-complex-physical-systems-2403.10904</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/urban-sound-propagation-a-benchmark-for-1-step-generative-modeling-of-complex-physical-systems-2403.10904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/urban-sound-propagation-a-benchmark-for-1-step-generative-modeling-of-complex-physical-systems-2403.10904"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-frame-level-learning-for-few-shot-sound-event-detection-2403.11091</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-frame-level-learning-for-few-shot-sound-event-detection-2403.11091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-frame-level-learning-for-few-shot-sound-event-detection-2403.11091"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-multi-source-inference-for-text-conditioned-music-diffusion-models-2403.11706</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-multi-source-inference-for-text-conditioned-music-diffusion-models-2403.11706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-multi-source-inference-for-text-conditioned-music-diffusion-models-2403.11706"/></url>
<url><loc>https://scifaro.com/en/abs/hallucination-in-perceptual-metric-driven-speech-enhancement-networks-2403.11732</loc><lastmod>2024-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hallucination-in-perceptual-metric-driven-speech-enhancement-networks-2403.11732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hallucination-in-perceptual-metric-driven-speech-enhancement-networks-2403.11732"/></url>
<url><loc>https://scifaro.com/en/abs/towards-the-development-of-a-real-time-deepfake-audio-detection-system-in-communication-platforms-2403.11778</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-the-development-of-a-real-time-deepfake-audio-detection-system-in-communication-platforms-2403.11778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-the-development-of-a-real-time-deepfake-audio-detection-system-in-communication-platforms-2403.11778"/></url>
<url><loc>https://scifaro.com/en/abs/prompt-singer-controllable-singing-voice-synthesis-with-natural-language-prompt-2403.11780</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompt-singer-controllable-singing-voice-synthesis-with-natural-language-prompt-2403.11780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompt-singer-controllable-singing-voice-synthesis-with-natural-language-prompt-2403.11780"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-and-localization-with-distance-estimation-2403.11827</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-and-localization-with-distance-estimation-2403.11827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-and-localization-with-distance-estimation-2403.11827"/></url>
<url><loc>https://scifaro.com/en/abs/unimodal-multi-task-fusion-for-emotional-mimicry-intensity-prediction-2403.11879</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unimodal-multi-task-fusion-for-emotional-mimicry-intensity-prediction-2403.11879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unimodal-multi-task-fusion-for-emotional-mimicry-intensity-prediction-2403.11879"/></url>
<url><loc>https://scifaro.com/en/abs/notochord-a-flexible-probabilistic-model-for-real-time-midi-performance-2403.12000</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/notochord-a-flexible-probabilistic-model-for-real-time-midi-performance-2403.12000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/notochord-a-flexible-probabilistic-model-for-real-time-midi-performance-2403.12000"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-extraction-using-spatially-regularized-independent-low-rank-matrix-analysis-and-rank-constrained-spatial-covariance-matrix-estimation-2403.12477</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-extraction-using-spatially-regularized-independent-low-rank-matrix-analysis-and-rank-constrained-spatial-covariance-matrix-estimation-2403.12477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-extraction-using-spatially-regularized-independent-low-rank-matrix-analysis-and-rank-constrained-spatial-covariance-matrix-estimation-2403.12477"/></url>
<url><loc>https://scifaro.com/en/abs/listenable-maps-for-audio-classifiers-2403.13086</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listenable-maps-for-audio-classifiers-2403.13086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listenable-maps-for-audio-classifiers-2403.13086"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-aware-convolution-for-sound-event-detection-2403.13252</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-aware-convolution-for-sound-event-detection-2403.13252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-aware-convolution-for-sound-event-detection-2403.13252"/></url>
<url><loc>https://scifaro.com/en/abs/onset-and-offset-weighted-loss-function-for-sound-event-detection-2403.13254</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/onset-and-offset-weighted-loss-function-for-sound-event-detection-2403.13254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/onset-and-offset-weighted-loss-function-for-sound-event-detection-2403.13254"/></url>
<url><loc>https://scifaro.com/en/abs/building-speech-corpus-with-diverse-voice-characteristics-for-its-prompt-based-representation-2403.13353</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-speech-corpus-with-diverse-voice-characteristics-for-its-prompt-based-representation-2403.13353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-speech-corpus-with-diverse-voice-characteristics-for-its-prompt-based-representation-2403.13353"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-long-content-speech-recognition-with-factorized-neural-transducer-2403.13423</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-long-content-speech-recognition-with-factorized-neural-transducer-2403.13423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-long-content-speech-recognition-with-factorized-neural-transducer-2403.13423"/></url>
<url><loc>https://scifaro.com/en/abs/utduss-utokyo-sarulab-system-for-interspeech2024-speech-processing-using-discrete-speech-unit-challenge-2403.13720</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utduss-utokyo-sarulab-system-for-interspeech2024-speech-processing-using-discrete-speech-unit-challenge-2403.13720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utduss-utokyo-sarulab-system-for-interspeech2024-speech-processing-using-discrete-speech-unit-challenge-2403.13720"/></url>
<url><loc>https://scifaro.com/en/abs/the-neurips-2023-machine-learning-for-audio-workshop-affective-audio-benchmarks-and-novel-data-2403.14048</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-neurips-2023-machine-learning-for-audio-workshop-affective-audio-benchmarks-and-novel-data-2403.14048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-neurips-2023-machine-learning-for-audio-workshop-affective-audio-benchmarks-and-novel-data-2403.14048"/></url>
<url><loc>https://scifaro.com/en/abs/emodarts-joint-optimisation-of-cnn-sequential-neural-network-architectures-for-superior-speech-emotion-recognition-2403.14083</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emodarts-joint-optimisation-of-cnn-sequential-neural-network-architectures-for-superior-speech-emotion-recognition-2403.14083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emodarts-joint-optimisation-of-cnn-sequential-neural-network-architectures-for-superior-speech-emotion-recognition-2403.14083"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-the-robustness-of-spectral-clustering-for-deep-speaker-diarization-2403.14286</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-the-robustness-of-spectral-clustering-for-deep-speaker-diarization-2403.14286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-the-robustness-of-spectral-clustering-for-deep-speaker-diarization-2403.14286"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-green-ai-for-audio-deepfake-detection-2403.14290</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-green-ai-for-audio-deepfake-detection-2403.14290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-green-ai-for-audio-deepfake-detection-2403.14290"/></url>
<url><loc>https://scifaro.com/en/abs/xlavs-r-cross-lingual-audio-visual-speech-representation-learning-for-noise-robust-speech-perception-2403.14402</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xlavs-r-cross-lingual-audio-visual-speech-representation-learning-for-noise-robust-speech-perception-2403.14402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xlavs-r-cross-lingual-audio-visual-speech-representation-learning-for-noise-robust-speech-perception-2403.14402"/></url>
<url><loc>https://scifaro.com/en/abs/music-to-dance-as-language-translation-using-sequence-models-2403.15569</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-to-dance-as-language-translation-using-sequence-models-2403.15569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-to-dance-as-language-translation-using-sequence-models-2403.15569"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-av-hubert-and-mask-and-recover-strategy-2403.16078</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-av-hubert-and-mask-and-recover-strategy-2403.16078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-av-hubert-and-mask-and-recover-strategy-2403.16078"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-analog-dynamic-range-compressors-using-deep-learning-and-state-space-models-2403.16331</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-analog-dynamic-range-compressors-using-deep-learning-and-state-space-models-2403.16331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-analog-dynamic-range-compressors-using-deep-learning-and-state-space-models-2403.16331"/></url>
<url><loc>https://scifaro.com/en/abs/training-generative-adversarial-network-based-vocoder-with-limited-data-using-augmentation-conditional-discriminator-2403.16464</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-generative-adversarial-network-based-vocoder-with-limited-data-using-augmentation-conditional-discriminator-2403.16464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-generative-adversarial-network-based-vocoder-with-limited-data-using-augmentation-conditional-discriminator-2403.16464"/></url>
<url><loc>https://scifaro.com/en/abs/accuracy-enhancement-method-for-speech-emotion-recognition-from-spectrogram-using-temporal-frequency-correlation-and-positional-information-learning-through-knowledge-transfer-2403.17327</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accuracy-enhancement-method-for-speech-emotion-recognition-from-spectrogram-using-temporal-frequency-correlation-and-positional-information-learning-through-knowledge-transfer-2403.17327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accuracy-enhancement-method-for-speech-emotion-recognition-from-spectrogram-using-temporal-frequency-correlation-and-positional-information-learning-through-knowledge-transfer-2403.17327"/></url>
<url><loc>https://scifaro.com/en/abs/theoretical-analysis-of-quality-of-conventional-beamforming-for-phased-microphone-arrays-2403.17376</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theoretical-analysis-of-quality-of-conventional-beamforming-for-phased-microphone-arrays-2403.17376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theoretical-analysis-of-quality-of-conventional-beamforming-for-phased-microphone-arrays-2403.17376"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-for-speech-generation-tasks-2403.17378</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-for-speech-generation-tasks-2403.17378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-neural-speech-phase-prediction-based-on-parallel-estimation-architecture-and-anti-wrapping-losses-for-speech-generation-tasks-2403.17378"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-and-applying-audio-based-sentiment-analysis-in-music-2403.17379</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-and-applying-audio-based-sentiment-analysis-in-music-2403.17379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-and-applying-audio-based-sentiment-analysis-in-music-2403.17379"/></url>
<url><loc>https://scifaro.com/en/abs/correlation-of-fr-echet-audio-distance-with-human-perception-of-environmental-audio-is-embedding-dependant-2403.17508</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correlation-of-fr-echet-audio-distance-with-human-perception-of-environmental-audio-is-embedding-dependant-2403.17508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correlation-of-fr-echet-audio-distance-with-human-perception-of-environmental-audio-is-embedding-dependant-2403.17508"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-deepfake-environmental-audio-2403.17529</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-deepfake-environmental-audio-2403.17529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-deepfake-environmental-audio-2403.17529"/></url>
<url><loc>https://scifaro.com/en/abs/deep-functional-multiple-index-models-with-an-application-to-ser-2403.17562</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-functional-multiple-index-models-with-an-application-to-ser-2403.17562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-functional-multiple-index-models-with-an-application-to-ser-2403.17562"/></url>
<url><loc>https://scifaro.com/en/abs/aces-evaluating-automated-audio-captioning-models-on-the-semantics-of-sounds-2403.18572</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aces-evaluating-automated-audio-captioning-models-on-the-semantics-of-sounds-2403.18572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aces-evaluating-automated-audio-captioning-models-on-the-semantics-of-sounds-2403.18572"/></url>
<url><loc>https://scifaro.com/en/abs/real-acoustic-fields-an-audio-visual-room-acoustics-dataset-and-benchmark-2403.18821</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-acoustic-fields-an-audio-visual-room-acoustics-dataset-and-benchmark-2403.18821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-acoustic-fields-an-audio-visual-room-acoustics-dataset-and-benchmark-2403.18821"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-neural-transducer-for-fine-grained-speech-emotion-recognition-2403.19224</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-neural-transducer-for-fine-grained-speech-emotion-recognition-2403.19224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-neural-transducer-for-fine-grained-speech-emotion-recognition-2403.19224"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-stochastic-transformer-based-approach-for-post-traumatic-stress-disorder-detection-using-audio-recording-of-clinical-interviews-2403.19441</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-stochastic-transformer-based-approach-for-post-traumatic-stress-disorder-detection-using-audio-recording-of-clinical-interviews-2403.19441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-stochastic-transformer-based-approach-for-post-traumatic-stress-disorder-detection-using-audio-recording-of-clinical-interviews-2403.19441"/></url>
<url><loc>https://scifaro.com/en/abs/asymmetric-and-trial-dependent-modeling-the-contribution-of-lia-to-sdsv-challenge-task-2-2403.19634</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asymmetric-and-trial-dependent-modeling-the-contribution-of-lia-to-sdsv-challenge-task-2-2403.19634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asymmetric-and-trial-dependent-modeling-the-contribution-of-lia-to-sdsv-challenge-task-2-2403.19634"/></url>
<url><loc>https://scifaro.com/en/abs/creating-aesthetic-sonifications-on-the-web-with-siren-2403.19763</loc><lastmod>2024-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-aesthetic-sonifications-on-the-web-with-siren-2403.19763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-aesthetic-sonifications-on-the-web-with-siren-2403.19763"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-classification-using-wasn-in-outdoor-environment-2403.20130</loc><lastmod>2026-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-classification-using-wasn-in-outdoor-environment-2403.20130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-classification-using-wasn-in-outdoor-environment-2403.20130"/></url>
<url><loc>https://scifaro.com/en/abs/voice-signal-processing-for-machine-learning-the-case-of-speaker-isolation-2403.20202</loc><lastmod>2024-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-signal-processing-for-machine-learning-the-case-of-speaker-isolation-2403.20202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-signal-processing-for-machine-learning-the-case-of-speaker-isolation-2403.20202"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-short-segment-pediatric-heart-sounds-based-on-a-transformer-based-convolutional-neural-network-2404.00470</loc><lastmod>2026-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-short-segment-pediatric-heart-sounds-based-on-a-transformer-based-convolutional-neural-network-2404.00470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-short-segment-pediatric-heart-sounds-based-on-a-transformer-based-convolutional-neural-network-2404.00470"/></url>
<url><loc>https://scifaro.com/en/abs/cm-tts-enhancing-real-time-text-to-speech-synthesis-efficiency-through-weighted-samplers-and-consistency-models-2404.00569</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cm-tts-enhancing-real-time-text-to-speech-synthesis-efficiency-through-weighted-samplers-and-consistency-models-2404.00569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cm-tts-enhancing-real-time-text-to-speech-synthesis-efficiency-through-weighted-samplers-and-consistency-models-2404.00569"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-audio-prompt-adherence-with-distribution-based-embedding-distances-2404.00775</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-audio-prompt-adherence-with-distribution-based-embedding-distances-2404.00775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-audio-prompt-adherence-with-distribution-based-embedding-distances-2404.00775"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-analysis-of-poetry-reading-audio-singing-narrating-or-somewhere-in-between-2404.00789</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-analysis-of-poetry-reading-audio-singing-narrating-or-somewhere-in-between-2404.00789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-analysis-of-poetry-reading-audio-singing-narrating-or-somewhere-in-between-2404.00789"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-neural-speech-codec-2404.00791</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-neural-speech-codec-2404.00791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-neural-speech-codec-2404.00791"/></url>
<url><loc>https://scifaro.com/en/abs/removing-speaker-information-from-speech-representation-using-variable-length-soft-pooling-2404.00856</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/removing-speaker-information-from-speech-representation-using-variable-length-soft-pooling-2404.00856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/removing-speaker-information-from-speech-representation-using-variable-length-soft-pooling-2404.00856"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-audio-representation-for-music-genre-identification-in-mir-2404.01058</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-audio-representation-for-music-genre-identification-in-mir-2404.01058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-audio-representation-for-music-genre-identification-in-mir-2404.01058"/></url>
<url><loc>https://scifaro.com/en/abs/voice-ehr-introducing-multimodal-audio-data-for-health-2404.01620</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-ehr-introducing-multimodal-audio-data-for-health-2404.01620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-ehr-introducing-multimodal-audio-data-for-health-2404.01620"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-audio-separation-via-bi-modal-semantic-similarity-2404.01740</loc><lastmod>2024-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-audio-separation-via-bi-modal-semantic-similarity-2404.01740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-audio-separation-via-bi-modal-semantic-similarity-2404.01740"/></url>
<url><loc>https://scifaro.com/en/abs/spmamba-state-space-model-is-all-you-need-in-speech-separation-2404.02063</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spmamba-state-space-model-is-all-you-need-in-speech-separation-2404.02063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spmamba-state-space-model-is-all-you-need-in-speech-separation-2404.02063"/></url>
<url><loc>https://scifaro.com/en/abs/smitin-self-monitored-inference-time-intervention-for-generative-music-transformers-2404.02252</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smitin-self-monitored-inference-time-intervention-for-generative-music-transformers-2404.02252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smitin-self-monitored-inference-time-intervention-for-generative-music-transformers-2404.02252"/></url>
<url><loc>https://scifaro.com/en/abs/pscodec-a-series-of-high-fidelity-low-bitrate-neural-speech-codecs-leveraging-prompt-encoders-2404.02702</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pscodec-a-series-of-high-fidelity-low-bitrate-neural-speech-codecs-leveraging-prompt-encoders-2404.02702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pscodec-a-series-of-high-fidelity-low-bitrate-neural-speech-codecs-leveraging-prompt-encoders-2404.02702"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-musical-characteristics-of-national-anthems-in-relation-to-global-indices-2404.03606</loc><lastmod>2024-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-musical-characteristics-of-national-anthems-in-relation-to-global-indices-2404.03606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-musical-characteristics-of-national-anthems-in-relation-to-global-indices-2404.03606"/></url>
<url><loc>https://scifaro.com/en/abs/holon-a-cybernetic-interface-for-bio-semiotics-2404.03894</loc><lastmod>2024-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/holon-a-cybernetic-interface-for-bio-semiotics-2404.03894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/holon-a-cybernetic-interface-for-bio-semiotics-2404.03894"/></url>
<url><loc>https://scifaro.com/en/abs/it-is-okay-to-be-uncommon-quantizing-sound-event-detection-networks-on-hardware-accelerators-with-uncommon-sub-byte-support-2404.04386</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/it-is-okay-to-be-uncommon-quantizing-sound-event-detection-networks-on-hardware-accelerators-with-uncommon-sub-byte-support-2404.04386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/it-is-okay-to-be-uncommon-quantizing-sound-event-detection-networks-on-hardware-accelerators-with-uncommon-sub-byte-support-2404.04386"/></url>
<url><loc>https://scifaro.com/en/abs/the-nes-video-music-database-a-dataset-of-symbolic-video-game-music-paired-with-gameplay-videos-2404.04420</loc><lastmod>2024-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nes-video-music-database-a-dataset-of-symbolic-video-game-music-paired-with-gameplay-videos-2404.04420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nes-video-music-database-a-dataset-of-symbolic-video-game-music-paired-with-gameplay-videos-2404.04420"/></url>
<url><loc>https://scifaro.com/en/abs/mathematics-of-the-mml-functional-quantizer-modules-for-vcv-rack-software-synthesizer-2404.04739</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mathematics-of-the-mml-functional-quantizer-modules-for-vcv-rack-software-synthesizer-2404.04739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mathematics-of-the-mml-functional-quantizer-modules-for-vcv-rack-software-synthesizer-2404.04739"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-audio-deepfake-detection-dataset-and-analysis-2404.04904</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-audio-deepfake-detection-dataset-and-analysis-2404.04904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-audio-deepfake-detection-dataset-and-analysis-2404.04904"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-bi-lstm-and-transformer-architecture-for-generating-tabla-music-2404.05765</loc><lastmod>2024-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-bi-lstm-and-transformer-architecture-for-generating-tabla-music-2404.05765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-bi-lstm-and-transformer-architecture-for-generating-tabla-music-2404.05765"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-diverse-sounds-identifying-outliers-in-a-music-corpus-2404.06103</loc><lastmod>2024-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-diverse-sounds-identifying-outliers-in-a-music-corpus-2404.06103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-diverse-sounds-identifying-outliers-in-a-music-corpus-2404.06103"/></url>
<url><loc>https://scifaro.com/en/abs/mupt-a-generative-symbolic-music-pretrained-transformer-2404.06393</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mupt-a-generative-symbolic-music-pretrained-transformer-2404.06393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mupt-a-generative-symbolic-music-pretrained-transformer-2404.06393"/></url>
<url><loc>https://scifaro.com/en/abs/voiceshop-a-unified-speech-to-speech-framework-for-identity-preserving-zero-shot-voice-editing-2404.06674</loc><lastmod>2024-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceshop-a-unified-speech-to-speech-framework-for-identity-preserving-zero-shot-voice-editing-2404.06674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceshop-a-unified-speech-to-speech-framework-for-identity-preserving-zero-shot-voice-editing-2404.06674"/></url>
<url><loc>https://scifaro.com/en/abs/learning-multidimensional-disentangled-representations-of-instrumental-sounds-for-musical-similarity-assessment-2404.06682</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-multidimensional-disentangled-representations-of-instrumental-sounds-for-musical-similarity-assessment-2404.06682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-multidimensional-disentangled-representations-of-instrumental-sounds-for-musical-similarity-assessment-2404.06682"/></url>
<url><loc>https://scifaro.com/en/abs/an-effective-automated-speaking-assessment-approach-to-mitigating-data-scarcity-and-imbalanced-distribution-2404.07575</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-effective-automated-speaking-assessment-approach-to-mitigating-data-scarcity-and-imbalanced-distribution-2404.07575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-effective-automated-speaking-assessment-approach-to-mitigating-data-scarcity-and-imbalanced-distribution-2404.07575"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-dual-stage-framework-for-personalized-speech-enhancement-based-on-deepfilternet2-2404.08022</loc><lastmod>2024-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-dual-stage-framework-for-personalized-speech-enhancement-based-on-deepfilternet2-2404.08022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-dual-stage-framework-for-personalized-speech-enhancement-based-on-deepfilternet2-2404.08022"/></url>
<url><loc>https://scifaro.com/en/abs/voice-attribute-editing-with-text-prompt-2404.08857</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-attribute-editing-with-text-prompt-2404.08857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-attribute-editing-with-text-prompt-2404.08857"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-comparison-of-multi-view-self-supervised-methods-for-music-tagging-2404.09177</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-comparison-of-multi-view-self-supervised-methods-for-music-tagging-2404.09177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-comparison-of-multi-view-self-supervised-methods-for-music-tagging-2404.09177"/></url>
<url><loc>https://scifaro.com/en/abs/prior-agnostic-multi-scale-contrastive-text-audio-pre-training-for-parallelized-tts-frontend-modeling-2404.09192</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prior-agnostic-multi-scale-contrastive-text-audio-pre-training-for-parallelized-tts-frontend-modeling-2404.09192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prior-agnostic-multi-scale-contrastive-text-audio-pre-training-for-parallelized-tts-frontend-modeling-2404.09192"/></url>
<url><loc>https://scifaro.com/en/abs/scoring-time-intervals-using-non-hierarchical-transformer-for-automatic-piano-transcription-2404.09466</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scoring-time-intervals-using-non-hierarchical-transformer-for-automatic-piano-transcription-2404.09466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scoring-time-intervals-using-non-hierarchical-transformer-for-automatic-piano-transcription-2404.09466"/></url>
<url><loc>https://scifaro.com/en/abs/tango-2-aligning-diffusion-based-text-to-audio-generations-through-direct-preference-optimization-2404.09956</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tango-2-aligning-diffusion-based-text-to-audio-generations-through-direct-preference-optimization-2404.09956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tango-2-aligning-diffusion-based-text-to-audio-generations-through-direct-preference-optimization-2404.09956"/></url>
<url><loc>https://scifaro.com/en/abs/long-form-music-generation-with-latent-diffusion-2404.10301</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-form-music-generation-with-latent-diffusion-2404.10301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-form-music-generation-with-latent-diffusion-2404.10301"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-mobile-target-detection-and-tracking-in-active-sonar-array-using-a-track-before-detect-approach-2404.10316</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-mobile-target-detection-and-tracking-in-active-sonar-array-using-a-track-before-detect-approach-2404.10316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-mobile-target-detection-and-tracking-in-active-sonar-array-using-a-track-before-detect-approach-2404.10316"/></url>
<url><loc>https://scifaro.com/en/abs/vivo-une-approche-multimodale-de-la-synthese-concatenative-par-corpus-dans-le-cadre-d-une-oeuvre-audiovisuelle-immersive-2404.10578</loc><lastmod>2024-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vivo-une-approche-multimodale-de-la-synthese-concatenative-par-corpus-dans-le-cadre-d-une-oeuvre-audiovisuelle-immersive-2404.10578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vivo-une-approche-multimodale-de-la-synthese-concatenative-par-corpus-dans-le-cadre-d-une-oeuvre-audiovisuelle-immersive-2404.10578"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speaker-diarization-in-distributed-iot-networks-using-federated-learning-2404.10842</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speaker-diarization-in-distributed-iot-networks-using-federated-learning-2404.10842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speaker-diarization-in-distributed-iot-networks-using-federated-learning-2404.10842"/></url>
<url><loc>https://scifaro.com/en/abs/music-enhancement-with-deep-filters-a-technical-report-for-the-icassp-2024-cadenza-challenge-2404.11116</loc><lastmod>2024-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-enhancement-with-deep-filters-a-technical-report-for-the-icassp-2024-cadenza-challenge-2404.11116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-enhancement-with-deep-filters-a-technical-report-for-the-icassp-2024-cadenza-challenge-2404.11116"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-recognizing-speech-and-singing-voices-based-on-multi-task-audio-source-separation-2404.11275</loc><lastmod>2024-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-recognizing-speech-and-singing-voices-based-on-multi-task-audio-source-separation-2404.11275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-recognizing-speech-and-singing-voices-based-on-multi-task-audio-source-separation-2404.11275"/></url>
<url><loc>https://scifaro.com/en/abs/large-language-models-from-notes-to-musical-form-2404.11976</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-language-models-from-notes-to-musical-form-2404.11976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-language-models-from-notes-to-musical-form-2404.11976"/></url>
<url><loc>https://scifaro.com/en/abs/midget-music-conditioned-3d-dance-generation-2404.12062</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midget-music-conditioned-3d-dance-generation-2404.12062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midget-music-conditioned-3d-dance-generation-2404.12062"/></url>
<url><loc>https://scifaro.com/en/abs/timit-speaker-profiling-a-comparison-of-multi-task-learning-and-single-task-learning-approaches-2404.12077</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timit-speaker-profiling-a-comparison-of-multi-task-learning-and-single-task-learning-approaches-2404.12077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timit-speaker-profiling-a-comparison-of-multi-task-learning-and-single-task-learning-approaches-2404.12077"/></url>
<url><loc>https://scifaro.com/en/abs/non-invasive-suicide-risk-prediction-through-speech-analysis-2404.12132</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-invasive-suicide-risk-prediction-through-speech-analysis-2404.12132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-invasive-suicide-risk-prediction-through-speech-analysis-2404.12132"/></url>
<url><loc>https://scifaro.com/en/abs/separate-in-the-speech-chain-cross-modal-conditional-audio-visual-target-speech-extraction-2404.12725</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-in-the-speech-chain-cross-modal-conditional-audio-visual-target-speech-extraction-2404.12725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-in-the-speech-chain-cross-modal-conditional-audio-visual-target-speech-extraction-2404.12725"/></url>
<url><loc>https://scifaro.com/en/abs/trnet-two-level-refinement-network-leveraging-speech-enhancement-for-noise-robust-speech-emotion-recognition-2404.12979</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trnet-two-level-refinement-network-leveraging-speech-enhancement-for-noise-robust-speech-emotion-recognition-2404.12979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trnet-two-level-refinement-network-leveraging-speech-enhancement-for-noise-robust-speech-emotion-recognition-2404.12979"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-generalization-in-audio-deepfake-detection-a-neural-collapse-based-sampling-and-training-approach-2404.13008</loc><lastmod>2024-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-generalization-in-audio-deepfake-detection-a-neural-collapse-based-sampling-and-training-approach-2404.13008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-generalization-in-audio-deepfake-detection-a-neural-collapse-based-sampling-and-training-approach-2404.13008"/></url>
<url><loc>https://scifaro.com/en/abs/track-role-prediction-of-single-instrumental-sequences-2404.13286</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/track-role-prediction-of-single-instrumental-sequences-2404.13286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/track-role-prediction-of-single-instrumental-sequences-2404.13286"/></url>
<url><loc>https://scifaro.com/en/abs/music-consistency-models-2404.13358</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-consistency-models-2404.13358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-consistency-models-2404.13358"/></url>
<url><loc>https://scifaro.com/en/abs/text-dependent-speaker-verification-tdsv-challenge-2024-challenge-evaluation-plan-2404.13428</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-dependent-speaker-verification-tdsv-challenge-2024-challenge-evaluation-plan-2404.13428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-dependent-speaker-verification-tdsv-challenge-2024-challenge-evaluation-plan-2404.13428"/></url>
<url><loc>https://scifaro.com/en/abs/mfhca-enhancing-speech-emotion-recognition-via-multi-spatial-fusion-and-hierarchical-cooperative-attention-2404.13509</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfhca-enhancing-speech-emotion-recognition-via-multi-spatial-fusion-and-hierarchical-cooperative-attention-2404.13509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfhca-enhancing-speech-emotion-recognition-via-multi-spatial-fusion-and-hierarchical-cooperative-attention-2404.13509"/></url>
<url><loc>https://scifaro.com/en/abs/audiorepinceptionnext-a-lightweight-single-stream-architecture-for-efficient-audio-recognition-2404.13551</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiorepinceptionnext-a-lightweight-single-stream-architecture-for-efficient-audio-recognition-2404.13551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiorepinceptionnext-a-lightweight-single-stream-architecture-for-efficient-audio-recognition-2404.13551"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-direction-of-arrival-estimation-method-based-on-vector-signal-reconstruction-with-a-single-vector-sensor-2404.13568</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-direction-of-arrival-estimation-method-based-on-vector-signal-reconstruction-with-a-single-vector-sensor-2404.13568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-direction-of-arrival-estimation-method-based-on-vector-signal-reconstruction-with-a-single-vector-sensor-2404.13568"/></url>
<url><loc>https://scifaro.com/en/abs/musical-word-embedding-for-music-tagging-and-retrieval-2404.13569</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-word-embedding-for-music-tagging-and-retrieval-2404.13569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-word-embedding-for-music-tagging-and-retrieval-2404.13569"/></url>
<url><loc>https://scifaro.com/en/abs/anchor-aware-deep-metric-learning-for-audio-visual-retrieval-2404.13789</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anchor-aware-deep-metric-learning-for-audio-visual-retrieval-2404.13789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anchor-aware-deep-metric-learning-for-audio-visual-retrieval-2404.13789"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-audio-deepfake-detection-2404.13892</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-audio-deepfake-detection-2404.13892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-audio-deepfake-detection-2404.13892"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-speech-deepfake-detection-2404.13914</loc><lastmod>2025-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-speech-deepfake-detection-2404.13914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-speech-deepfake-detection-2404.13914"/></url>
<url><loc>https://scifaro.com/en/abs/lvns-rave-diversified-audio-generation-with-rave-and-latent-vector-novelty-search-2404.14063</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lvns-rave-diversified-audio-generation-with-rave-and-latent-vector-novelty-search-2404.14063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lvns-rave-diversified-audio-generation-with-rave-and-latent-vector-novelty-search-2404.14063"/></url>
<url><loc>https://scifaro.com/en/abs/music-style-transfer-with-diffusion-model-2404.14771</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-style-transfer-with-diffusion-model-2404.14771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-style-transfer-with-diffusion-model-2404.14771"/></url>
<url><loc>https://scifaro.com/en/abs/storytts-a-highly-expressive-text-to-speech-dataset-with-rich-textual-expressiveness-annotations-2404.14946</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/storytts-a-highly-expressive-text-to-speech-dataset-with-rich-textual-expressiveness-annotations-2404.14946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/storytts-a-highly-expressive-text-to-speech-dataset-with-rich-textual-expressiveness-annotations-2404.14946"/></url>
<url><loc>https://scifaro.com/en/abs/every-breath-you-don-t-take-deepfake-speech-detection-using-breath-2404.15143</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/every-breath-you-don-t-take-deepfake-speech-detection-using-breath-2404.15143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/every-breath-you-don-t-take-deepfake-speech-detection-using-breath-2404.15143"/></url>
<url><loc>https://scifaro.com/en/abs/vector-signal-reconstruction-sparse-and-parametric-approach-of-direction-of-arrival-using-single-vector-hydrophone-2404.15160</loc><lastmod>2025-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-signal-reconstruction-sparse-and-parametric-approach-of-direction-of-arrival-using-single-vector-hydrophone-2404.15160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-signal-reconstruction-sparse-and-parametric-approach-of-direction-of-arrival-using-single-vector-hydrophone-2404.15160"/></url>
<url><loc>https://scifaro.com/en/abs/tailors-new-music-timbre-visualizer-to-entertain-music-through-imagery-2404.15181</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tailors-new-music-timbre-visualizer-to-entertain-music-through-imagery-2404.15181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tailors-new-music-timbre-visualizer-to-entertain-music-through-imagery-2404.15181"/></url>
<url><loc>https://scifaro.com/en/abs/hybridvc-efficient-voice-style-conversion-with-text-and-audio-prompts-2404.15637</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybridvc-efficient-voice-style-conversion-with-text-and-audio-prompts-2404.15637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybridvc-efficient-voice-style-conversion-with-text-and-audio-prompts-2404.15637"/></url>
<url><loc>https://scifaro.com/en/abs/an-experiment-with-electric-guitar-signals-for-exploring-the-virtuosity-based-on-the-entropy-of-music-2404.16259</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experiment-with-electric-guitar-signals-for-exploring-the-virtuosity-based-on-the-entropy-of-music-2404.16259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experiment-with-electric-guitar-signals-for-exploring-the-virtuosity-based-on-the-entropy-of-music-2404.16259"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-tropical-reef-bird-and-unrelated-sounds-for-superior-transfer-learning-in-marine-bioacoustics-2404.16436</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-tropical-reef-bird-and-unrelated-sounds-for-superior-transfer-learning-in-marine-bioacoustics-2404.16436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-tropical-reef-bird-and-unrelated-sounds-for-superior-transfer-learning-in-marine-bioacoustics-2404.16436"/></url>
<url><loc>https://scifaro.com/en/abs/the-thu-hcsi-multi-speaker-multi-lingual-few-shot-voice-cloning-system-for-limmits-24-challenge-2404.16619</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-thu-hcsi-multi-speaker-multi-lingual-few-shot-voice-cloning-system-for-limmits-24-challenge-2404.16619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-thu-hcsi-multi-speaker-multi-lingual-few-shot-voice-cloning-system-for-limmits-24-challenge-2404.16619"/></url>
<url><loc>https://scifaro.com/en/abs/cocola-coherence-oriented-contrastive-learning-of-musical-audio-representations-2404.16969</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cocola-coherence-oriented-contrastive-learning-of-musical-audio-representations-2404.16969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cocola-coherence-oriented-contrastive-learning-of-musical-audio-representations-2404.16969"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-differences-in-lab-quality-and-remote-recording-methods-with-dynamic-acoustic-measures-2404.17022</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-differences-in-lab-quality-and-remote-recording-methods-with-dynamic-acoustic-measures-2404.17022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-differences-in-lab-quality-and-remote-recording-methods-with-dynamic-acoustic-measures-2404.17022"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-time-frequency-representation-discriminators-for-high-fidelity-vocoder-2404.17161</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-time-frequency-representation-discriminators-for-high-fidelity-vocoder-2404.17161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-time-frequency-representation-discriminators-for-high-fidelity-vocoder-2404.17161"/></url>
<url><loc>https://scifaro.com/en/abs/device-feature-based-on-graph-fourier-transformation-with-logarithmic-processing-for-detection-of-replay-speech-attacks-2404.17280</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/device-feature-based-on-graph-fourier-transformation-with-logarithmic-processing-for-detection-of-replay-speech-attacks-2404.17280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/device-feature-based-on-graph-fourier-transformation-with-logarithmic-processing-for-detection-of-replay-speech-attacks-2404.17280"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-audio-from-silent-video-using-sequence-to-sequence-modeling-2404.17608</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-audio-from-silent-video-using-sequence-to-sequence-modeling-2404.17608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-audio-from-silent-video-using-sequence-to-sequence-modeling-2404.17608"/></url>
<url><loc>https://scifaro.com/en/abs/an-rfp-dataset-for-real-fake-and-partially-fake-audio-detection-2404.17721</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-rfp-dataset-for-real-fake-and-partially-fake-audio-detection-2404.17721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-rfp-dataset-for-real-fake-and-partially-fake-audio-detection-2404.17721"/></url>
<url><loc>https://scifaro.com/en/abs/t-clap-temporal-enhanced-contrastive-language-audio-pretraining-2404.17806</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-clap-temporal-enhanced-contrastive-language-audio-pretraining-2404.17806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-clap-temporal-enhanced-contrastive-language-audio-pretraining-2404.17806"/></url>
<url><loc>https://scifaro.com/en/abs/an-automatic-mixing-speech-enhancement-system-for-multi-track-audio-2404.17821</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automatic-mixing-speech-enhancement-system-for-multi-track-audio-2404.17821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automatic-mixing-speech-enhancement-system-for-multi-track-audio-2404.17821"/></url>
<url><loc>https://scifaro.com/en/abs/ti-asu-toward-robust-automatic-speech-understanding-through-text-to-speech-imputation-against-missing-speech-modality-2404.17983</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ti-asu-toward-robust-automatic-speech-understanding-through-text-to-speech-imputation-against-missing-speech-modality-2404.17983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ti-asu-toward-robust-automatic-speech-understanding-through-text-to-speech-imputation-against-missing-speech-modality-2404.17983"/></url>
<url><loc>https://scifaro.com/en/abs/towards-privacy-preserving-audio-classification-systems-2404.18002</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-privacy-preserving-audio-classification-systems-2404.18002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-privacy-preserving-audio-classification-systems-2404.18002"/></url>
<url><loc>https://scifaro.com/en/abs/composerx-multi-agent-symbolic-music-composition-with-llms-2404.18081</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composerx-multi-agent-symbolic-music-composition-with-llms-2404.18081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composerx-multi-agent-symbolic-music-composition-with-llms-2404.18081"/></url>
<url><loc>https://scifaro.com/en/abs/usat-a-universal-speaker-adaptive-text-to-speech-approach-2404.18094</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usat-a-universal-speaker-adaptive-text-to-speech-approach-2404.18094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usat-a-universal-speaker-adaptive-text-to-speech-approach-2404.18094"/></url>
<url><loc>https://scifaro.com/en/abs/pi-eces-de-viole-des-cinq-livres-and-their-statistical-signatures-the-musical-work-of-marin-marais-and-jordi-savall-2404.18355</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pi-eces-de-viole-des-cinq-livres-and-their-statistical-signatures-the-musical-work-of-marin-marais-and-jordi-savall-2404.18355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pi-eces-de-viole-des-cinq-livres-and-their-statistical-signatures-the-musical-work-of-marin-marais-and-jordi-savall-2404.18355"/></url>
<url><loc>https://scifaro.com/en/abs/a-systematic-evaluation-of-adversarial-attacks-against-speech-emotion-recognition-models-2404.18514</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-systematic-evaluation-of-adversarial-attacks-against-speech-emotion-recognition-models-2404.18514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-systematic-evaluation-of-adversarial-attacks-against-speech-emotion-recognition-models-2404.18514"/></url>
<url><loc>https://scifaro.com/en/abs/certification-of-speaker-recognition-models-to-additive-perturbations-2404.18791</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/certification-of-speaker-recognition-models-to-additive-perturbations-2404.18791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/certification-of-speaker-recognition-models-to-additive-perturbations-2404.18791"/></url>
<url><loc>https://scifaro.com/en/abs/contuner-singing-voice-beautifying-with-pitch-and-expressiveness-condition-2404.19187</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contuner-singing-voice-beautifying-with-pitch-and-expressiveness-condition-2404.19187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contuner-singing-voice-beautifying-with-pitch-and-expressiveness-condition-2404.19187"/></url>
<url><loc>https://scifaro.com/en/abs/ead-vc-enhancing-speech-auto-disentanglement-for-voice-conversion-with-ifub-estimator-and-joint-text-guided-consistent-learning-2404.19212</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ead-vc-enhancing-speech-auto-disentanglement-for-voice-conversion-with-ifub-estimator-and-joint-text-guided-consistent-learning-2404.19212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ead-vc-enhancing-speech-auto-disentanglement-for-voice-conversion-with-ifub-estimator-and-joint-text-guided-consistent-learning-2404.19212"/></url>
<url><loc>https://scifaro.com/en/abs/efficientasr-speech-recognition-network-compression-via-attention-redundancy-and-chunk-level-ffn-optimization-2404.19214</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficientasr-speech-recognition-network-compression-via-attention-redundancy-and-chunk-level-ffn-optimization-2404.19214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficientasr-speech-recognition-network-compression-via-attention-redundancy-and-chunk-level-ffn-optimization-2404.19214"/></url>
<url><loc>https://scifaro.com/en/abs/esc-efficient-speech-coding-with-cross-scale-residual-vector-quantized-transformers-2404.19441</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/esc-efficient-speech-coding-with-cross-scale-residual-vector-quantized-transformers-2404.19441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/esc-efficient-speech-coding-with-cross-scale-residual-vector-quantized-transformers-2404.19441"/></url>
<url><loc>https://scifaro.com/en/abs/semanticodec-an-ultra-low-bitrate-semantic-audio-codec-for-general-sound-2405.00233</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semanticodec-an-ultra-low-bitrate-semantic-audio-codec-for-general-sound-2405.00233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semanticodec-an-ultra-low-bitrate-semantic-audio-codec-for-general-sound-2405.00233"/></url>
<url><loc>https://scifaro.com/en/abs/who-is-authentic-speaker-2405.00248</loc><lastmod>2024-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-is-authentic-speaker-2405.00248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-is-authentic-speaker-2405.00248"/></url>
<url><loc>https://scifaro.com/en/abs/active-learning-with-task-adaptation-pre-training-for-speech-emotion-recognition-2405.00307</loc><lastmod>2024-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-learning-with-task-adaptation-pre-training-for-speech-emotion-recognition-2405.00307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-learning-with-task-adaptation-pre-training-for-speech-emotion-recognition-2405.00307"/></url>
<url><loc>https://scifaro.com/en/abs/learning-expressive-disentangled-speech-representations-with-soft-speech-units-and-adversarial-style-augmentation-2405.00603</loc><lastmod>2024-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-expressive-disentangled-speech-representations-with-soft-speech-units-and-adversarial-style-augmentation-2405.00603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-expressive-disentangled-speech-representations-with-soft-speech-units-and-adversarial-style-augmentation-2405.00603"/></url>
<url><loc>https://scifaro.com/en/abs/main-vc-lightweight-speech-representation-disentanglement-for-one-shot-voice-conversion-2405.00930</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/main-vc-lightweight-speech-representation-disentanglement-for-one-shot-voice-conversion-2405.00930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/main-vc-lightweight-speech-representation-disentanglement-for-one-shot-voice-conversion-2405.00930"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-models-in-speech-recognition-measuring-gpu-energy-consumption-impact-of-noise-and-model-quantization-for-edge-deployment-2405.01004</loc><lastmod>2024-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-models-in-speech-recognition-measuring-gpu-energy-consumption-impact-of-noise-and-model-quantization-for-edge-deployment-2405.01004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-models-in-speech-recognition-measuring-gpu-energy-consumption-impact-of-noise-and-model-quantization-for-edge-deployment-2405.01004"/></url>
<url><loc>https://scifaro.com/en/abs/tramba-a-hybrid-transformer-and-mamba-architecture-for-practical-audio-and-bone-conduction-speech-super-resolution-and-enhancement-on-mobile-and-wearable-platforms-2405.01242</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tramba-a-hybrid-transformer-and-mamba-architecture-for-practical-audio-and-bone-conduction-speech-super-resolution-and-enhancement-on-mobile-and-wearable-platforms-2405.01242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tramba-a-hybrid-transformer-and-mamba-architecture-for-practical-audio-and-bone-conduction-speech-super-resolution-and-enhancement-on-mobile-and-wearable-platforms-2405.01242"/></url>
<url><loc>https://scifaro.com/en/abs/toward-end-to-end-interpretable-convolutional-neural-networks-for-waveform-signals-2405.01815</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-end-to-end-interpretable-convolutional-neural-networks-for-waveform-signals-2405.01815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-end-to-end-interpretable-convolutional-neural-networks-for-waveform-signals-2405.01815"/></url>
<url><loc>https://scifaro.com/en/abs/joint-sentiment-analysis-of-lyrics-and-audio-in-music-2405.01988</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-sentiment-analysis-of-lyrics-and-audio-in-music-2405.01988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-sentiment-analysis-of-lyrics-and-audio-in-music-2405.01988"/></url>
<url><loc>https://scifaro.com/en/abs/envid-a-metric-learning-approach-for-forensic-few-shot-identification-of-unseen-environments-2405.02119</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/envid-a-metric-learning-approach-for-forensic-few-shot-identification-of-unseen-environments-2405.02119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/envid-a-metric-learning-approach-for-forensic-few-shot-identification-of-unseen-environments-2405.02119"/></url>
<url><loc>https://scifaro.com/en/abs/unveiling-the-potential-of-llm-based-asr-on-chinese-open-source-datasets-2405.02132</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unveiling-the-potential-of-llm-based-asr-on-chinese-open-source-datasets-2405.02132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unveiling-the-potential-of-llm-based-asr-on-chinese-open-source-datasets-2405.02132"/></url>
<url><loc>https://scifaro.com/en/abs/gmp-tl-gender-augmented-multi-scale-pseudo-label-enhanced-transfer-learning-for-speech-emotion-recognition-2405.02151</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gmp-tl-gender-augmented-multi-scale-pseudo-label-enhanced-transfer-learning-for-speech-emotion-recognition-2405.02151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gmp-tl-gender-augmented-multi-scale-pseudo-label-enhanced-transfer-learning-for-speech-emotion-recognition-2405.02151"/></url>
<url><loc>https://scifaro.com/en/abs/training-free-deepfake-voice-recognition-by-leveraging-large-scale-pre-trained-models-2405.02179</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-free-deepfake-voice-recognition-by-leveraging-large-scale-pre-trained-models-2405.02179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-free-deepfake-voice-recognition-by-leveraging-large-scale-pre-trained-models-2405.02179"/></url>
<url><loc>https://scifaro.com/en/abs/speech-technology-services-for-oral-history-research-2405.02333</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-technology-services-for-oral-history-research-2405.02333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-technology-services-for-oral-history-research-2405.02333"/></url>
<url><loc>https://scifaro.com/en/abs/quranic-audio-dataset-crowdsourced-and-labeled-recitation-from-non-arabic-speakers-2405.02675</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quranic-audio-dataset-crowdsourced-and-labeled-recitation-from-non-arabic-speakers-2405.02675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quranic-audio-dataset-crowdsourced-and-labeled-recitation-from-non-arabic-speakers-2405.02675"/></url>
<url><loc>https://scifaro.com/en/abs/mozart-s-touch-a-lightweight-multi-modal-music-generation-framework-based-on-pre-trained-large-models-2405.02801</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mozart-s-touch-a-lightweight-multi-modal-music-generation-framework-based-on-pre-trained-large-models-2405.02801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mozart-s-touch-a-lightweight-multi-modal-music-generation-framework-based-on-pre-trained-large-models-2405.02801"/></url>
<url><loc>https://scifaro.com/en/abs/sim2real-transfer-for-audio-visual-navigation-with-frequency-adaptive-acoustic-field-prediction-2405.02821</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sim2real-transfer-for-audio-visual-navigation-with-frequency-adaptive-acoustic-field-prediction-2405.02821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sim2real-transfer-for-audio-visual-navigation-with-frequency-adaptive-acoustic-field-prediction-2405.02821"/></url>
<url><loc>https://scifaro.com/en/abs/steered-response-power-for-sound-source-localization-a-tutorial-review-2405.02991</loc><lastmod>2024-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steered-response-power-for-sound-source-localization-a-tutorial-review-2405.02991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steered-response-power-for-sound-source-localization-a-tutorial-review-2405.02991"/></url>
<url><loc>https://scifaro.com/en/abs/repaugment-input-agnostic-representation-level-augmentation-for-respiratory-sound-classification-2405.02996</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repaugment-input-agnostic-representation-level-augmentation-for-respiratory-sound-classification-2405.02996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repaugment-input-agnostic-representation-level-augmentation-for-respiratory-sound-classification-2405.02996"/></url>
<url><loc>https://scifaro.com/en/abs/determined-multichannel-blind-source-separation-with-clustered-source-model-2405.03118</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/determined-multichannel-blind-source-separation-with-clustered-source-model-2405.03118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/determined-multichannel-blind-source-separation-with-clustered-source-model-2405.03118"/></url>
<url><loc>https://scifaro.com/en/abs/popdg-popular-3d-dance-generation-with-popdanceset-2405.03178</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/popdg-popular-3d-dance-generation-with-popdanceset-2405.03178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/popdg-popular-3d-dance-generation-with-popdanceset-2405.03178"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-aeroacoustic-wind-tunnel-studies-through-massive-channel-upscaling-with-mems-microphones-2405.03322</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-aeroacoustic-wind-tunnel-studies-through-massive-channel-upscaling-with-mems-microphones-2405.03322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-aeroacoustic-wind-tunnel-studies-through-massive-channel-upscaling-with-mems-microphones-2405.03322"/></url>
<url><loc>https://scifaro.com/en/abs/fully-reversing-the-shoebox-image-source-method-from-impulse-responses-to-room-parameters-2405.03385</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-reversing-the-shoebox-image-source-method-from-impulse-responses-to-room-parameters-2405.03385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-reversing-the-shoebox-image-source-method-from-impulse-responses-to-room-parameters-2405.03385"/></url>
<url><loc>https://scifaro.com/en/abs/whispy-adapting-stt-whisper-models-to-real-time-environments-2405.03484</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whispy-adapting-stt-whisper-models-to-real-time-environments-2405.03484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whispy-adapting-stt-whisper-models-to-real-time-environments-2405.03484"/></url>
<url><loc>https://scifaro.com/en/abs/deep-space-separable-distillation-for-lightweight-acoustic-scene-classification-2405.03567</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-space-separable-distillation-for-lightweight-acoustic-scene-classification-2405.03567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-space-separable-distillation-for-lightweight-acoustic-scene-classification-2405.03567"/></url>
<url><loc>https://scifaro.com/en/abs/hafformer-a-hierarchical-attention-free-framework-for-alzheimer-s-disease-detection-from-spontaneous-speech-2405.03952</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hafformer-a-hierarchical-attention-free-framework-for-alzheimer-s-disease-detection-from-spontaneous-speech-2405.03952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hafformer-a-hierarchical-attention-free-framework-for-alzheimer-s-disease-detection-from-spontaneous-speech-2405.03952"/></url>
<url><loc>https://scifaro.com/en/abs/intelligent-cardiac-auscultation-for-murmur-detection-via-parallel-attentive-models-with-uncertainty-estimation-2405.03953</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intelligent-cardiac-auscultation-for-murmur-detection-via-parallel-attentive-models-with-uncertainty-estimation-2405.03953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intelligent-cardiac-auscultation-for-murmur-detection-via-parallel-attentive-models-with-uncertainty-estimation-2405.03953"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-speech-emotion-representation-learning-based-on-dynamic-graph-2405.03956</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-speech-emotion-representation-learning-based-on-dynamic-graph-2405.03956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-speech-emotion-representation-learning-based-on-dynamic-graph-2405.03956"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-study-of-state-based-neural-networks-for-virtual-analog-audio-effects-modeling-2405.04124</loc><lastmod>2025-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-study-of-state-based-neural-networks-for-virtual-analog-audio-effects-modeling-2405.04124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-study-of-state-based-neural-networks-for-virtual-analog-audio-effects-modeling-2405.04124"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-music-deepfakes-is-easy-but-actually-hard-2405.04181</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-music-deepfakes-is-easy-but-actually-hard-2405.04181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-music-deepfakes-is-easy-but-actually-hard-2405.04181"/></url>
<url><loc>https://scifaro.com/en/abs/universal-spatial-audio-transcoder-2405.04471</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-spatial-audio-transcoder-2405.04471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-spatial-audio-transcoder-2405.04471"/></url>
<url><loc>https://scifaro.com/en/abs/the-codecfake-dataset-and-countermeasures-for-the-universally-detection-of-deepfake-audio-2405.04880</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-codecfake-dataset-and-countermeasures-for-the-universally-detection-of-deepfake-audio-2405.04880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-codecfake-dataset-and-countermeasures-for-the-universally-detection-of-deepfake-audio-2405.04880"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speech-pattern-disorders-in-autism-using-machine-learning-2405.05126</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speech-pattern-disorders-in-autism-using-machine-learning-2405.05126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speech-pattern-disorders-in-autism-using-machine-learning-2405.05126"/></url>
<url><loc>https://scifaro.com/en/abs/an-lstm-based-chord-generation-system-using-chroma-histogram-representations-2405.05240</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-lstm-based-chord-generation-system-using-chroma-histogram-representations-2405.05240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-lstm-based-chord-generation-system-using-chroma-histogram-representations-2405.05240"/></url>
<url><loc>https://scifaro.com/en/abs/afen-respiratory-disease-classification-using-ensemble-learning-2405.05467</loc><lastmod>2024-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/afen-respiratory-disease-classification-using-ensemble-learning-2405.05467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/afen-respiratory-disease-classification-using-ensemble-learning-2405.05467"/></url>
<url><loc>https://scifaro.com/en/abs/the-royalflush-automatic-speech-diarization-and-recognition-system-for-in-car-multi-channel-automatic-speech-recognition-challenge-2405.05498</loc><lastmod>2024-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-royalflush-automatic-speech-diarization-and-recognition-system-for-in-car-multi-channel-automatic-speech-recognition-challenge-2405.05498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-royalflush-automatic-speech-diarization-and-recognition-system-for-in-car-multi-channel-automatic-speech-recognition-challenge-2405.05498"/></url>
<url><loc>https://scifaro.com/en/abs/look-once-to-hear-target-speech-hearing-with-noisy-examples-2405.06289</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/look-once-to-hear-target-speech-hearing-with-noisy-examples-2405.06289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/look-once-to-hear-target-speech-hearing-with-noisy-examples-2405.06289"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-incorporating-mamba-for-speech-enhancement-2405.06573</loc><lastmod>2025-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-incorporating-mamba-for-speech-enhancement-2405.06573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-incorporating-mamba-for-speech-enhancement-2405.06573"/></url>
<url><loc>https://scifaro.com/en/abs/music-emotion-prediction-using-recurrent-neural-networks-2405.06747</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-emotion-prediction-using-recurrent-neural-networks-2405.06747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-emotion-prediction-using-recurrent-neural-networks-2405.06747"/></url>
<url><loc>https://scifaro.com/en/abs/time-of-arrival-estimation-and-phase-unwrapping-of-head-related-transfer-functions-with-integer-linear-programming-2405.06804</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-of-arrival-estimation-and-phase-unwrapping-of-head-related-transfer-functions-with-integer-linear-programming-2405.06804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-of-arrival-estimation-and-phase-unwrapping-of-head-related-transfer-functions-with-integer-linear-programming-2405.06804"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-cross-domain-audio-visual-deception-detection-2405.06995</loc><lastmod>2025-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-cross-domain-audio-visual-deception-detection-2405.06995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-cross-domain-audio-visual-deception-detection-2405.06995"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-of-text-dependent-speaker-verification-for-chinese-numerical-string-corpus-2405.07029</loc><lastmod>2024-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-of-text-dependent-speaker-verification-for-chinese-numerical-string-corpus-2405.07029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-of-text-dependent-speaker-verification-for-chinese-numerical-string-corpus-2405.07029"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-accessible-and-rapidly-trainable-rhythm-sequencer-using-a-generative-stacked-autoencoder-2405.07034</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-accessible-and-rapidly-trainable-rhythm-sequencer-using-a-generative-stacked-autoencoder-2405.07034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-accessible-and-rapidly-trainable-rhythm-sequencer-using-a-generative-stacked-autoencoder-2405.07034"/></url>
<url><loc>https://scifaro.com/en/abs/soccernet-echoes-a-soccer-game-audio-commentary-dataset-2405.07354</loc><lastmod>2025-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soccernet-echoes-a-soccer-game-audio-commentary-dataset-2405.07354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soccernet-echoes-a-soccer-game-audio-commentary-dataset-2405.07354"/></url>
<url><loc>https://scifaro.com/en/abs/rene-a-pre-trained-multi-modal-architecture-for-auscultation-of-respiratory-diseases-2405.07442</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rene-a-pre-trained-multi-modal-architecture-for-auscultation-of-respiratory-diseases-2405.07442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rene-a-pre-trained-multi-modal-architecture-for-auscultation-of-respiratory-diseases-2405.07442"/></url>
<url><loc>https://scifaro.com/en/abs/fastsag-towards-fast-non-autoregressive-singing-accompaniment-generation-2405.07682</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastsag-towards-fast-non-autoregressive-singing-accompaniment-generation-2405.07682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastsag-towards-fast-non-autoregressive-singing-accompaniment-generation-2405.07682"/></url>
<url><loc>https://scifaro.com/en/abs/diff-ets-learning-a-diffusion-probabilistic-model-for-electromyography-to-speech-conversion-2405.08021</loc><lastmod>2024-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-ets-learning-a-diffusion-probabilistic-model-for-electromyography-to-speech-conversion-2405.08021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-ets-learning-a-diffusion-probabilistic-model-for-electromyography-to-speech-conversion-2405.08021"/></url>
<url><loc>https://scifaro.com/en/abs/abnormal-respiratory-sound-identification-using-audio-spectrogram-vision-transformer-2405.08342</loc><lastmod>2024-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/abnormal-respiratory-sound-identification-using-audio-spectrogram-vision-transformer-2405.08342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/abnormal-respiratory-sound-identification-using-audio-spectrogram-vision-transformer-2405.08342"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-audio-deepfake-detection-a-evolving-benchmark-for-continual-learning-2405.08596</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-audio-deepfake-detection-a-evolving-benchmark-for-continual-learning-2405.08596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-audio-deepfake-detection-a-evolving-benchmark-for-continual-learning-2405.08596"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-design-choices-in-joint-embedding-predictive-architectures-for-general-audio-representation-learning-2405.08679</loc><lastmod>2024-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-design-choices-in-joint-embedding-predictive-architectures-for-general-audio-representation-learning-2405.08679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-design-choices-in-joint-embedding-predictive-architectures-for-general-audio-representation-learning-2405.08679"/></url>
<url><loc>https://scifaro.com/en/abs/polyglotfake-a-novel-multilingual-and-multimodal-deepfake-dataset-2405.08838</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyglotfake-a-novel-multilingual-and-multimodal-deepfake-dataset-2405.08838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyglotfake-a-novel-multilingual-and-multimodal-deepfake-dataset-2405.08838"/></url>
<url><loc>https://scifaro.com/en/abs/naturalistic-music-decoding-from-eeg-data-via-latent-diffusion-models-2405.09062</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturalistic-music-decoding-from-eeg-data-via-latent-diffusion-models-2405.09062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturalistic-music-decoding-from-eeg-data-via-latent-diffusion-models-2405.09062"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-emotion-prediction-and-control-in-text-to-speech-synthesis-2405.09171</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-emotion-prediction-and-control-in-text-to-speech-synthesis-2405.09171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-emotion-prediction-and-control-in-text-to-speech-synthesis-2405.09171"/></url>
<url><loc>https://scifaro.com/en/abs/perception-inspired-graph-convolution-for-music-understanding-tasks-2405.09224</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perception-inspired-graph-convolution-for-music-understanding-tasks-2405.09224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perception-inspired-graph-convolution-for-music-understanding-tasks-2405.09224"/></url>
<url><loc>https://scifaro.com/en/abs/smug-explain-a-framework-for-symbolic-music-graph-explanations-2405.09241</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smug-explain-a-framework-for-symbolic-music-graph-explanations-2405.09241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smug-explain-a-framework-for-symbolic-music-graph-explanations-2405.09241"/></url>
<url><loc>https://scifaro.com/en/abs/towards-evaluating-the-robustness-of-automatic-speech-recognition-systems-via-audio-style-transfer-2405.09470</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-evaluating-the-robustness-of-automatic-speech-recognition-systems-via-audio-style-transfer-2405.09470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-evaluating-the-robustness-of-automatic-speech-recognition-systems-via-audio-style-transfer-2405.09470"/></url>
<url><loc>https://scifaro.com/en/abs/whole-song-hierarchical-generation-of-symbolic-music-using-cascaded-diffusion-models-2405.09901</loc><lastmod>2024-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whole-song-hierarchical-generation-of-symbolic-music-using-cascaded-diffusion-models-2405.09901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whole-song-hierarchical-generation-of-symbolic-music-using-cascaded-diffusion-models-2405.09901"/></url>
<url><loc>https://scifaro.com/en/abs/building-a-luganda-text-to-speech-model-from-crowdsourced-data-2405.10211</loc><lastmod>2024-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-a-luganda-text-to-speech-model-from-crowdsourced-data-2405.10211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-a-luganda-text-to-speech-model-from-crowdsourced-data-2405.10211"/></url>
<url><loc>https://scifaro.com/en/abs/dac-jax-a-jax-implementation-of-the-descript-audio-codec-2405.11554</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dac-jax-a-jax-implementation-of-the-descript-audio-codec-2405.11554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dac-jax-a-jax-implementation-of-the-descript-audio-codec-2405.11554"/></url>
<url><loc>https://scifaro.com/en/abs/neighborhood-attention-transformer-with-progressive-channel-fusion-for-speaker-verification-2405.12031</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neighborhood-attention-transformer-with-progressive-channel-fusion-for-speaker-verification-2405.12031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neighborhood-attention-transformer-with-progressive-channel-fusion-for-speaker-verification-2405.12031"/></url>
<url><loc>https://scifaro.com/en/abs/symplex-controllable-symbolic-music-generation-using-simplex-diffusion-with-vocabulary-priors-2405.12666</loc><lastmod>2024-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symplex-controllable-symbolic-music-generation-using-simplex-diffusion-with-vocabulary-priors-2405.12666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symplex-controllable-symbolic-music-generation-using-simplex-diffusion-with-vocabulary-priors-2405.12666"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-the-analysis-of-murine-neonatal-ultrasonic-vocalizations-development-evaluation-and-application-of-different-mathematical-models-2405.12957</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-the-analysis-of-murine-neonatal-ultrasonic-vocalizations-development-evaluation-and-application-of-different-mathematical-models-2405.12957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-the-analysis-of-murine-neonatal-ultrasonic-vocalizations-development-evaluation-and-application-of-different-mathematical-models-2405.12957"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-real-time-accent-conversion-model-with-voice-cloning-2405.13162</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-real-time-accent-conversion-model-with-voice-cloning-2405.13162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-real-time-accent-conversion-model-with-voice-cloning-2405.13162"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonizer-neural-upmixing-as-spherical-harmonics-generation-2405.13428</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonizer-neural-upmixing-as-spherical-harmonics-generation-2405.13428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonizer-neural-upmixing-as-spherical-harmonics-generation-2405.13428"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-real-world-polyphonic-piano-audio-to-score-transcription-with-hierarchical-decoding-2405.13527</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-real-world-polyphonic-piano-audio-to-score-transcription-with-hierarchical-decoding-2405.13527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-real-world-polyphonic-piano-audio-to-score-transcription-with-hierarchical-decoding-2405.13527"/></url>
<url><loc>https://scifaro.com/en/abs/audio-mamba-pretrained-audio-state-space-model-for-audio-tagging-2405.13636</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-mamba-pretrained-audio-state-space-model-for-audio-tagging-2405.13636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-mamba-pretrained-audio-state-space-model-for-audio-tagging-2405.13636"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-perception-representation-and-its-neuroscientific-exploration-a-comprehensive-review-2405.13661</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-perception-representation-and-its-neuroscientific-exploration-a-comprehensive-review-2405.13661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-perception-representation-and-its-neuroscientific-exploration-a-comprehensive-review-2405.13661"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-sound-field-from-the-perspective-of-band-limited-functions-2405.14290</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-sound-field-from-the-perspective-of-band-limited-functions-2405.14290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-sound-field-from-the-perspective-of-band-limited-functions-2405.14290"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-user-defined-keyword-spotting-using-shifted-delta-coefficients-2405.14489</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-user-defined-keyword-spotting-using-shifted-delta-coefficients-2405.14489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-user-defined-keyword-spotting-using-shifted-delta-coefficients-2405.14489"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-real-electric-guitar-tones-and-effects-to-improve-robustness-in-guitar-tablature-transcription-modeling-2405.14679</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-real-electric-guitar-tones-and-effects-to-improve-robustness-in-guitar-tablature-transcription-modeling-2405.14679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-real-electric-guitar-tones-and-effects-to-improve-robustness-in-guitar-tablature-transcription-modeling-2405.14679"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-training-an-ai-model-2405.15096</loc><lastmod>2024-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-training-an-ai-model-2405.15096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-training-an-ai-model-2405.15096"/></url>
<url><loc>https://scifaro.com/en/abs/the-rarity-of-musical-audio-signals-within-the-space-of-possible-audio-generation-2405.15103</loc><lastmod>2024-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-rarity-of-musical-audio-signals-within-the-space-of-possible-audio-generation-2405.15103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-rarity-of-musical-audio-signals-within-the-space-of-possible-audio-generation-2405.15103"/></url>
<url><loc>https://scifaro.com/en/abs/soundlocd-an-efficient-conditional-discrete-contrastive-latent-diffusion-model-for-text-to-sound-generation-2405.15338</loc><lastmod>2024-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundlocd-an-efficient-conditional-discrete-contrastive-latent-diffusion-model-for-text-to-sound-generation-2405.15338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundlocd-an-efficient-conditional-discrete-contrastive-latent-diffusion-model-for-text-to-sound-generation-2405.15338"/></url>
<url><loc>https://scifaro.com/en/abs/hiddenspeaker-generate-imperceptible-unlearnable-audios-for-speaker-verification-system-2405.15655</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hiddenspeaker-generate-imperceptible-unlearnable-audios-for-speaker-verification-system-2405.15655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hiddenspeaker-generate-imperceptible-unlearnable-audios-for-speaker-verification-system-2405.15655"/></url>
<url><loc>https://scifaro.com/en/abs/quality-aware-masked-diffusion-transformer-for-enhanced-music-generation-2405.15863</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quality-aware-masked-diffusion-transformer-for-enhanced-music-generation-2405.15863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quality-aware-masked-diffusion-transformer-for-enhanced-music-generation-2405.15863"/></url>
<url><loc>https://scifaro.com/en/abs/carnatic-raga-identification-system-using-rigorous-time-delay-neural-network-2405.16000</loc><lastmod>2024-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/carnatic-raga-identification-system-using-rigorous-time-delay-neural-network-2405.16000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/carnatic-raga-identification-system-using-rigorous-time-delay-neural-network-2405.16000"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-the-charlie-parker-omnibook-using-an-audio-to-score-automatic-transcription-pipeline-2405.16687</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-the-charlie-parker-omnibook-using-an-audio-to-score-automatic-transcription-pipeline-2405.16687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-the-charlie-parker-omnibook-using-an-audio-to-score-automatic-transcription-pipeline-2405.16687"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-voice-activity-detection-based-on-lightweight-neural-2405.16797</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-voice-activity-detection-based-on-lightweight-neural-2405.16797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-voice-activity-detection-based-on-lightweight-neural-2405.16797"/></url>
<url><loc>https://scifaro.com/en/abs/rset-remapping-based-sorting-method-for-emotion-transfer-speech-synthesis-2405.17028</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rset-remapping-based-sorting-method-for-emotion-transfer-speech-synthesis-2405.17028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rset-remapping-based-sorting-method-for-emotion-transfer-speech-synthesis-2405.17028"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-fusion-architecture-for-pd-detection-using-semi-supervised-speech-embeddings-2405.17206</loc><lastmod>2024-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-fusion-architecture-for-pd-detection-using-semi-supervised-speech-embeddings-2405.17206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-fusion-architecture-for-pd-detection-using-semi-supervised-speech-embeddings-2405.17206"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-music-genre-classification-through-multi-algorithm-analysis-and-user-friendly-visualization-2405.17413</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-music-genre-classification-through-multi-algorithm-analysis-and-user-friendly-visualization-2405.17413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-music-genre-classification-through-multi-algorithm-analysis-and-user-friendly-visualization-2405.17413"/></url>
<url><loc>https://scifaro.com/en/abs/listenable-maps-for-zero-shot-audio-classifiers-2405.17615</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listenable-maps-for-zero-shot-audio-classifiers-2405.17615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listenable-maps-for-zero-shot-audio-classifiers-2405.17615"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-centric-framework-for-machine-listening-projects-addressing-large-scale-data-acquisition-and-labeling-through-active-learning-2405.18153</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-centric-framework-for-machine-listening-projects-addressing-large-scale-data-acquisition-and-labeling-through-active-learning-2405.18153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-centric-framework-for-machine-listening-projects-addressing-large-scale-data-acquisition-and-labeling-through-active-learning-2405.18153"/></url>
<url><loc>https://scifaro.com/en/abs/neraf-3d-scene-infused-neural-radiance-and-acoustic-fields-2405.18213</loc><lastmod>2025-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neraf-3d-scene-infused-neural-radiance-and-acoustic-fields-2405.18213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neraf-3d-scene-infused-neural-radiance-and-acoustic-fields-2405.18213"/></url>
<url><loc>https://scifaro.com/en/abs/instruct-musicgen-unlocking-text-to-music-editing-for-music-language-models-via-instruction-tuning-2405.18386</loc><lastmod>2025-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instruct-musicgen-unlocking-text-to-music-editing-for-music-language-models-via-instruction-tuning-2405.18386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instruct-musicgen-unlocking-text-to-music-editing-for-music-language-models-via-instruction-tuning-2405.18386"/></url>
<url><loc>https://scifaro.com/en/abs/soundctm-unifying-score-based-and-consistency-models-for-full-band-text-to-sound-generation-2405.18503</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundctm-unifying-score-based-and-consistency-models-for-full-band-text-to-sound-generation-2405.18503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundctm-unifying-score-based-and-consistency-models-for-full-band-text-to-sound-generation-2405.18503"/></url>
<url><loc>https://scifaro.com/en/abs/reverse-the-auditory-processing-pathway-coarse-to-fine-audio-reconstruction-from-fmri-2405.18726</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverse-the-auditory-processing-pathway-coarse-to-fine-audio-reconstruction-from-fmri-2405.18726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverse-the-auditory-processing-pathway-coarse-to-fine-audio-reconstruction-from-fmri-2405.18726"/></url>
<url><loc>https://scifaro.com/en/abs/sonos-voice-control-bias-assessment-dataset-a-methodology-for-demographic-bias-assessment-in-voice-assistants-2405.19342</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonos-voice-control-bias-assessment-dataset-a-methodology-for-demographic-bias-assessment-in-voice-assistants-2405.19342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonos-voice-control-bias-assessment-dataset-a-methodology-for-demographic-bias-assessment-in-voice-assistants-2405.19342"/></url>
<url><loc>https://scifaro.com/en/abs/luganda-speech-intent-recognition-for-iot-applications-2405.19343</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/luganda-speech-intent-recognition-for-iot-applications-2405.19343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/luganda-speech-intent-recognition-for-iot-applications-2405.19343"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-attribute-based-speaker-verification-2405.19796</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-attribute-based-speaker-verification-2405.19796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-attribute-based-speaker-verification-2405.19796"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-mapping-of-singing-voices-u-net-assisted-vocal-segmentation-2405.20059</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-mapping-of-singing-voices-u-net-assisted-vocal-segmentation-2405.20059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-mapping-of-singing-voices-u-net-assisted-vocal-segmentation-2405.20059"/></url>
<url><loc>https://scifaro.com/en/abs/is-self-supervised-learning-enough-to-fill-in-the-gap-a-study-on-speech-inpainting-2405.20101</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-self-supervised-learning-enough-to-fill-in-the-gap-a-study-on-speech-inpainting-2405.20101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-self-supervised-learning-enough-to-fill-in-the-gap-a-study-on-speech-inpainting-2405.20101"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-feature-boosting-for-explainable-speech-emotion-recognition-2405.20172</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-feature-boosting-for-explainable-speech-emotion-recognition-2405.20172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-feature-boosting-for-explainable-speech-emotion-recognition-2405.20172"/></url>
<url><loc>https://scifaro.com/en/abs/ditto-2-distilled-diffusion-inference-time-t-optimization-for-music-generation-2405.20289</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditto-2-distilled-diffusion-inference-time-t-optimization-for-music-generation-2405.20289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditto-2-distilled-diffusion-inference-time-t-optimization-for-music-generation-2405.20289"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-dataset-sampling-rate-for-noise-cancellation-through-deep-learning-2405.20884</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-dataset-sampling-rate-for-noise-cancellation-through-deep-learning-2405.20884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-dataset-sampling-rate-for-noise-cancellation-through-deep-learning-2405.20884"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-condition-monitoring-of-bolted-joints-through-acoustic-emission-and-deep-transfer-learning-generalization-ordinal-loss-and-super-convergence-2405.20887</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-condition-monitoring-of-bolted-joints-through-acoustic-emission-and-deep-transfer-learning-generalization-ordinal-loss-and-super-convergence-2405.20887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-condition-monitoring-of-bolted-joints-through-acoustic-emission-and-deep-transfer-learning-generalization-ordinal-loss-and-super-convergence-2405.20887"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-deep-learning-audio-generation-methods-2406.00146</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-deep-learning-audio-generation-methods-2406.00146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-deep-learning-audio-generation-methods-2406.00146"/></url>
<url><loc>https://scifaro.com/en/abs/creative-text-to-audio-generation-via-synthesizer-programming-2406.00294</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creative-text-to-audio-generation-via-synthesizer-programming-2406.00294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creative-text-to-audio-generation-via-synthesizer-programming-2406.00294"/></url>
<url><loc>https://scifaro.com/en/abs/frieren-efficient-video-to-audio-generation-network-with-rectified-flow-matching-2406.00320</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frieren-efficient-video-to-audio-generation-network-with-rectified-flow-matching-2406.00320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frieren-efficient-video-to-audio-generation-network-with-rectified-flow-matching-2406.00320"/></url>
<url><loc>https://scifaro.com/en/abs/recent-advances-in-end-to-end-simultaneous-speech-translation-2406.00497</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-advances-in-end-to-end-simultaneous-speech-translation-2406.00497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-advances-in-end-to-end-simultaneous-speech-translation-2406.00497"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-heart-sound-classification-using-mel-frequency-cepstral-coefficients-and-comparative-analysis-of-single-vs-ensemble-classifier-strategies-2406.00702</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-heart-sound-classification-using-mel-frequency-cepstral-coefficients-and-comparative-analysis-of-single-vs-ensemble-classifier-strategies-2406.00702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-heart-sound-classification-using-mel-frequency-cepstral-coefficients-and-comparative-analysis-of-single-vs-ensemble-classifier-strategies-2406.00702"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-error-analysis-of-raw-waveform-acoustic-models-with-parametric-and-non-parametric-cnns-2406.00898</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-error-analysis-of-raw-waveform-acoustic-models-with-parametric-and-non-parametric-cnns-2406.00898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-error-analysis-of-raw-waveform-acoustic-models-with-parametric-and-non-parametric-cnns-2406.00898"/></url>
<url><loc>https://scifaro.com/en/abs/searching-for-music-mixing-graphs-a-pruning-approach-2406.01049</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/searching-for-music-mixing-graphs-a-pruning-approach-2406.01049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/searching-for-music-mixing-graphs-a-pruning-approach-2406.01049"/></url>
<url><loc>https://scifaro.com/en/abs/animal2vec-and-meerkat-a-self-supervised-transformer-for-rare-event-raw-audio-input-and-a-large-scale-reference-dataset-for-bioacoustics-2406.01253</loc><lastmod>2026-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/animal2vec-and-meerkat-a-self-supervised-transformer-for-rare-event-raw-audio-input-and-a-large-scale-reference-dataset-for-bioacoustics-2406.01253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/animal2vec-and-meerkat-a-self-supervised-transformer-for-rare-event-raw-audio-input-and-a-large-scale-reference-dataset-for-bioacoustics-2406.01253"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-multi-modal-speech-in-painting-2406.01321</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-multi-modal-speech-in-painting-2406.01321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-multi-modal-speech-in-painting-2406.01321"/></url>
<url><loc>https://scifaro.com/en/abs/tinysv-speaker-verification-in-tinyml-with-on-device-learning-2406.01655</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tinysv-speaker-verification-in-tinyml-with-on-device-learning-2406.01655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tinysv-speaker-verification-in-tinyml-with-on-device-learning-2406.01655"/></url>
<url><loc>https://scifaro.com/en/abs/masksr-masked-language-model-for-full-band-speech-restoration-2406.02092</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masksr-masked-language-model-for-full-band-speech-restoration-2406.02092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masksr-masked-language-model-for-full-band-speech-restoration-2406.02092"/></url>
<url><loc>https://scifaro.com/en/abs/whistle-data-efficient-multilingual-and-crosslingual-speech-recognition-via-weakly-phonetic-supervision-2406.02166</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whistle-data-efficient-multilingual-and-crosslingual-speech-recognition-via-weakly-phonetic-supervision-2406.02166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whistle-data-efficient-multilingual-and-crosslingual-speech-recognition-via-weakly-phonetic-supervision-2406.02166"/></url>
<url><loc>https://scifaro.com/en/abs/audio-mamba-selective-state-spaces-for-self-supervised-audio-representations-2406.02178</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-mamba-selective-state-spaces-for-self-supervised-audio-representations-2406.02178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-mamba-selective-state-spaces-for-self-supervised-audio-representations-2406.02178"/></url>
<url><loc>https://scifaro.com/en/abs/an-independence-promoting-loss-for-music-generation-with-language-models-2406.02315</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-independence-promoting-loss-for-music-generation-with-language-models-2406.02315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-independence-promoting-loss-for-music-generation-with-language-models-2406.02315"/></url>
<url><loc>https://scifaro.com/en/abs/simplespeech-towards-simple-and-efficient-text-to-speech-with-scalar-latent-transformer-diffusion-models-2406.02328</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simplespeech-towards-simple-and-efficient-text-to-speech-with-scalar-latent-transformer-diffusion-models-2406.02328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simplespeech-towards-simple-and-efficient-text-to-speech-with-scalar-latent-transformer-diffusion-models-2406.02328"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-models-in-peer-to-peer-learning-a-practical-application-2406.02565</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-models-in-peer-to-peer-learning-a-practical-application-2406.02565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-models-in-peer-to-peer-learning-a-practical-application-2406.02565"/></url>
<url><loc>https://scifaro.com/en/abs/livespeech-low-latency-zero-shot-text-to-speech-via-autoregressive-modeling-of-audio-discrete-codes-2406.02897</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/livespeech-low-latency-zero-shot-text-to-speech-via-autoregressive-modeling-of-audio-discrete-codes-2406.02897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/livespeech-low-latency-zero-shot-text-to-speech-via-autoregressive-modeling-of-audio-discrete-codes-2406.02897"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-index-collapse-of-large-codebook-speech-tokenizer-with-dual-decoding-product-quantized-variational-auto-encoder-2406.02940</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-index-collapse-of-large-codebook-speech-tokenizer-with-dual-decoding-product-quantized-variational-auto-encoder-2406.02940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-index-collapse-of-large-codebook-speech-tokenizer-with-dual-decoding-product-quantized-variational-auto-encoder-2406.02940"/></url>
<url><loc>https://scifaro.com/en/abs/dataset-distillation-generative-model-for-speech-emotion-recognition-2406.02963</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dataset-distillation-generative-model-for-speech-emotion-recognition-2406.02963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dataset-distillation-generative-model-for-speech-emotion-recognition-2406.02963"/></url>
<url><loc>https://scifaro.com/en/abs/an-interpretable-speech-foundation-model-for-depression-detection-by-revealing-prediction-relevant-acoustic-features-from-long-speech-2406.03138</loc><lastmod>2026-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-interpretable-speech-foundation-model-for-depression-detection-by-revealing-prediction-relevant-acoustic-features-from-long-speech-2406.03138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-interpretable-speech-foundation-model-for-depression-detection-by-revealing-prediction-relevant-acoustic-features-from-long-speech-2406.03138"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-fake-audio-detection-via-deep-stable-learning-2406.03237</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-fake-audio-detection-via-deep-stable-learning-2406.03237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-fake-audio-detection-via-deep-stable-learning-2406.03237"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-source-tracing-detecting-novel-audio-deepfake-algorithm-with-real-emphasis-and-fake-dispersion-strategy-2406.03240</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-source-tracing-detecting-novel-audio-deepfake-algorithm-with-real-emphasis-and-fake-dispersion-strategy-2406.03240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-source-tracing-detecting-novel-audio-deepfake-algorithm-with-real-emphasis-and-fake-dispersion-strategy-2406.03240"/></url>
<url><loc>https://scifaro.com/en/abs/genuine-focused-learning-using-mask-autoencoder-for-generalized-fake-audio-detection-2406.03247</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/genuine-focused-learning-using-mask-autoencoder-for-generalized-fake-audio-detection-2406.03247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/genuine-focused-learning-using-mask-autoencoder-for-generalized-fake-audio-detection-2406.03247"/></url>
<url><loc>https://scifaro.com/en/abs/asobo-attentive-beamformer-selection-for-distant-speaker-diarization-in-meetings-2406.03251</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asobo-attentive-beamformer-selection-for-distant-speaker-diarization-in-meetings-2406.03251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asobo-attentive-beamformer-selection-for-distant-speaker-diarization-in-meetings-2406.03251"/></url>
<url><loc>https://scifaro.com/en/abs/audio-mamba-bidirectional-state-space-model-for-audio-representation-learning-2406.03344</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-mamba-bidirectional-state-space-model-for-audio-representation-learning-2406.03344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-mamba-bidirectional-state-space-model-for-audio-representation-learning-2406.03344"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-clinical-depression-screening-an-empirical-study-2406.03510</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-clinical-depression-screening-an-empirical-study-2406.03510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-clinical-depression-screening-an-empirical-study-2406.03510"/></url>
<url><loc>https://scifaro.com/en/abs/harder-or-different-understanding-generalization-of-audio-deepfake-detection-2406.03512</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harder-or-different-understanding-generalization-of-audio-deepfake-detection-2406.03512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harder-or-different-understanding-generalization-of-audio-deepfake-detection-2406.03512"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-codec-based-zero-shot-text-to-speech-synthesis-with-multi-modal-context-and-large-language-model-2406.03706</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-codec-based-zero-shot-text-to-speech-synthesis-with-multi-modal-context-and-large-language-model-2406.03706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-codec-based-zero-shot-text-to-speech-synthesis-with-multi-modal-context-and-large-language-model-2406.03706"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-generation-in-prompt-based-text-to-speech-synthesis-with-context-aware-contrastive-language-audio-pretraining-2406.03714</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-generation-in-prompt-based-text-to-speech-synthesis-with-context-aware-contrastive-language-audio-pretraining-2406.03714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-generation-in-prompt-based-text-to-speech-synthesis-with-context-aware-contrastive-language-audio-pretraining-2406.03714"/></url>
<url><loc>https://scifaro.com/en/abs/silentcipher-deep-audio-watermarking-2406.03822</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/silentcipher-deep-audio-watermarking-2406.03822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/silentcipher-deep-audio-watermarking-2406.03822"/></url>
<url><loc>https://scifaro.com/en/abs/strada-a-singer-traits-dataset-2406.04140</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/strada-a-singer-traits-dataset-2406.04140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/strada-a-singer-traits-dataset-2406.04140"/></url>
<url><loc>https://scifaro.com/en/abs/prompt-guided-precise-audio-editing-with-diffusion-models-2406.04350</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompt-guided-precise-audio-editing-with-diffusion-models-2406.04350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompt-guided-precise-audio-editing-with-diffusion-models-2406.04350"/></url>
<url><loc>https://scifaro.com/en/abs/muse-flexible-voiceprint-receptive-fields-and-multi-path-fusion-enhanced-taylor-transformer-for-u-net-based-speech-enhancement-2406.04589</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muse-flexible-voiceprint-receptive-fields-and-multi-path-fusion-enhanced-taylor-transformer-for-u-net-based-speech-enhancement-2406.04589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muse-flexible-voiceprint-receptive-fields-and-multi-path-fusion-enhanced-taylor-transformer-for-u-net-based-speech-enhancement-2406.04589"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-aware-rnn-t-for-mandarin-chinese-mispronunciation-detection-and-diagnosis-2406.04595</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-aware-rnn-t-for-mandarin-chinese-mispronunciation-detection-and-diagnosis-2406.04595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-aware-rnn-t-for-mandarin-chinese-mispronunciation-detection-and-diagnosis-2406.04595"/></url>
<url><loc>https://scifaro.com/en/abs/pppr-portable-plug-in-prompt-refiner-for-text-to-audio-generation-2406.04683</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pppr-portable-plug-in-prompt-refiner-for-text-to-audio-generation-2406.04683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pppr-portable-plug-in-prompt-refiner-for-text-to-audio-generation-2406.04683"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-smoothed-knn-speaker-adaptation-for-end-to-end-asr-2406.04791</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-smoothed-knn-speaker-adaptation-for-end-to-end-asr-2406.04791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-smoothed-knn-speaker-adaptation-for-end-to-end-asr-2406.04791"/></url>
<url><loc>https://scifaro.com/en/abs/traceablespeech-towards-proactively-traceable-text-to-speech-with-watermarking-2406.04840</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/traceablespeech-towards-proactively-traceable-text-to-speech-with-watermarking-2406.04840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/traceablespeech-towards-proactively-traceable-text-to-speech-with-watermarking-2406.04840"/></url>
<url><loc>https://scifaro.com/en/abs/daisy-data-adaptive-self-supervised-early-exit-for-speech-representation-models-2406.05464</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/daisy-data-adaptive-self-supervised-early-exit-for-speech-representation-models-2406.05464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/daisy-data-adaptive-self-supervised-early-exit-for-speech-representation-models-2406.05464"/></url>
<url><loc>https://scifaro.com/en/abs/mmm-whatcha-say-uncovering-distal-and-proximal-context-effects-in-first-and-second-language-word-perception-using-psychophysical-reverse-correlation-2406.05515</loc><lastmod>2025-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmm-whatcha-say-uncovering-distal-and-proximal-context-effects-in-first-and-second-language-word-perception-using-psychophysical-reverse-correlation-2406.05515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmm-whatcha-say-uncovering-distal-and-proximal-context-effects-in-first-and-second-language-word-perception-using-psychophysical-reverse-correlation-2406.05515"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-benefits-of-tokenization-of-discrete-acoustic-units-2406.05547</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-benefits-of-tokenization-of-discrete-acoustic-units-2406.05547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-benefits-of-tokenization-of-discrete-acoustic-units-2406.05547"/></url>
<url><loc>https://scifaro.com/en/abs/heart-sound-segmentation-using-deep-learning-techniques-2406.05653</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heart-sound-segmentation-using-deep-learning-techniques-2406.05653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heart-sound-segmentation-using-deep-learning-techniques-2406.05653"/></url>
<url><loc>https://scifaro.com/en/abs/towards-expressive-zero-shot-speech-synthesis-with-hierarchical-prosody-modeling-2406.05681</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-expressive-zero-shot-speech-synthesis-with-hierarchical-prosody-modeling-2406.05681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-expressive-zero-shot-speech-synthesis-with-hierarchical-prosody-modeling-2406.05681"/></url>
<url><loc>https://scifaro.com/en/abs/spa-svc-self-supervised-pitch-augmentation-for-singing-voice-conversion-2406.05692</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spa-svc-self-supervised-pitch-augmentation-for-singing-voice-conversion-2406.05692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spa-svc-self-supervised-pitch-augmentation-for-singing-voice-conversion-2406.05692"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-multi-stuttered-speech-classification-leveraging-whisper-s-encoder-for-efficient-parameter-reduction-in-automated-assessment-2406.05784</loc><lastmod>2025-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-multi-stuttered-speech-classification-leveraging-whisper-s-encoder-for-efficient-parameter-reduction-in-automated-assessment-2406.05784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-multi-stuttered-speech-classification-leveraging-whisper-s-encoder-for-efficient-parameter-reduction-in-automated-assessment-2406.05784"/></url>
<url><loc>https://scifaro.com/en/abs/source-free-domain-adaptation-for-speaker-verification-in-data-scarce-languages-and-noisy-channels-2406.05863</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-free-domain-adaptation-for-speaker-verification-in-data-scarce-languages-and-noisy-channels-2406.05863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-free-domain-adaptation-for-speaker-verification-in-data-scarce-languages-and-noisy-channels-2406.05863"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-from-synthetic-audio-doppelg-angers-2406.05923</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-from-synthetic-audio-doppelg-angers-2406.05923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-from-synthetic-audio-doppelg-angers-2406.05923"/></url>
<url><loc>https://scifaro.com/en/abs/rawbmamba-end-to-end-bidirectional-state-space-model-for-audio-deepfake-detection-2406.06086</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rawbmamba-end-to-end-bidirectional-state-space-model-for-audio-deepfake-detection-2406.06086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rawbmamba-end-to-end-bidirectional-state-space-model-for-audio-deepfake-detection-2406.06086"/></url>
<url><loc>https://scifaro.com/en/abs/streamatt-direct-streaming-speech-to-text-translation-with-attention-based-audio-history-selection-2406.06097</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamatt-direct-streaming-speech-to-text-translation-with-attention-based-audio-history-selection-2406.06097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamatt-direct-streaming-speech-to-text-translation-with-attention-based-audio-history-selection-2406.06097"/></url>
<url><loc>https://scifaro.com/en/abs/thunder-unified-regression-diffusion-speech-enhancement-with-a-single-reverse-step-using-brownian-bridge-2406.06139</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thunder-unified-regression-diffusion-speech-enhancement-with-a-single-reverse-step-using-brownian-bridge-2406.06139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thunder-unified-regression-diffusion-speech-enhancement-with-a-single-reverse-step-using-brownian-bridge-2406.06139"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-the-effect-of-speech-pathology-on-automatic-and-human-speaker-verification-2406.06208</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-the-effect-of-speech-pathology-on-automatic-and-human-speaker-verification-2406.06208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-the-effect-of-speech-pathology-on-automatic-and-human-speaker-verification-2406.06208"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-captioning-using-soft-and-hard-prompts-2406.06295</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-using-soft-and-hard-prompts-2406.06295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-using-soft-and-hard-prompts-2406.06295"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-improved-mvdr-beamforming-for-sound-enhancement-2406.06310</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-improved-mvdr-beamforming-for-sound-enhancement-2406.06310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-improved-mvdr-beamforming-for-sound-enhancement-2406.06310"/></url>
<url><loc>https://scifaro.com/en/abs/an-automatic-analysis-of-ultrasound-vocalisations-for-the-prediction-of-interaction-context-in-captive-egyptian-fruit-bats-2406.06332</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automatic-analysis-of-ultrasound-vocalisations-for-the-prediction-of-interaction-context-in-captive-egyptian-fruit-bats-2406.06332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automatic-analysis-of-ultrasound-vocalisations-for-the-prediction-of-interaction-context-in-captive-egyptian-fruit-bats-2406.06332"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-step-count-estimation-for-running-windowing-and-neural-network-baselines-2406.06339</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-step-count-estimation-for-running-windowing-and-neural-network-baselines-2406.06339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-step-count-estimation-for-running-windowing-and-neural-network-baselines-2406.06339"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-heart-activity-from-speech-using-data-driven-and-knowledge-based-features-2406.06341</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-heart-activity-from-speech-using-data-driven-and-knowledge-based-features-2406.06341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-heart-activity-from-speech-using-data-driven-and-knowledge-based-features-2406.06341"/></url>
<url><loc>https://scifaro.com/en/abs/mosa-music-motion-with-semantic-annotation-dataset-for-cross-modal-music-processing-2406.06375</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mosa-music-motion-with-semantic-annotation-dataset-for-cross-modal-music-processing-2406.06375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mosa-music-motion-with-semantic-annotation-dataset-for-cross-modal-music-processing-2406.06375"/></url>
<url><loc>https://scifaro.com/en/abs/bts-bridging-text-and-sound-modalities-for-metadata-aided-respiratory-sound-classification-2406.06786</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bts-bridging-text-and-sound-modalities-for-metadata-aided-respiratory-sound-classification-2406.06786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bts-bridging-text-and-sound-modalities-for-metadata-aided-respiratory-sound-classification-2406.06786"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-up-masked-audio-encoder-learning-for-general-audio-classification-2406.06992</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-up-masked-audio-encoder-learning-for-general-audio-classification-2406.06992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-up-masked-audio-encoder-learning-for-general-audio-classification-2406.06992"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-language-gaps-in-audio-text-retrieval-2406.07012</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-language-gaps-in-audio-text-retrieval-2406.07012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-language-gaps-in-audio-text-retrieval-2406.07012"/></url>
<url><loc>https://scifaro.com/en/abs/icgan-an-implicit-conditioning-method-for-interpretable-feature-control-of-neural-audio-synthesis-2406.07131</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icgan-an-implicit-conditioning-method-for-interpretable-feature-control-of-neural-audio-synthesis-2406.07131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icgan-an-implicit-conditioning-method-for-interpretable-feature-control-of-neural-audio-synthesis-2406.07131"/></url>
<url><loc>https://scifaro.com/en/abs/emobox-multilingual-multi-corpus-speech-emotion-recognition-toolkit-and-benchmark-2406.07162</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emobox-multilingual-multi-corpus-speech-emotion-recognition-toolkit-and-benchmark-2406.07162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emobox-multilingual-multi-corpus-speech-emotion-recognition-toolkit-and-benchmark-2406.07162"/></url>
<url><loc>https://scifaro.com/en/abs/paraclap-towards-a-general-language-audio-model-for-computational-paralinguistic-tasks-2406.07203</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paraclap-towards-a-general-language-audio-model-for-computational-paralinguistic-tasks-2406.07203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paraclap-towards-a-general-language-audio-model-for-computational-paralinguistic-tasks-2406.07203"/></url>
<url><loc>https://scifaro.com/en/abs/src4vc-smartphone-recorded-corpus-for-voice-conversion-benchmark-2406.07254</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/src4vc-smartphone-recorded-corpus-for-voice-conversion-benchmark-2406.07254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/src4vc-smartphone-recorded-corpus-for-voice-conversion-benchmark-2406.07254"/></url>
<url><loc>https://scifaro.com/en/abs/as-70-a-mandarin-stuttered-speech-dataset-for-automatic-speech-recognition-and-stuttering-event-detection-2406.07256</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/as-70-a-mandarin-stuttered-speech-dataset-for-automatic-speech-recognition-and-stuttering-event-detection-2406.07256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/as-70-a-mandarin-stuttered-speech-dataset-for-automatic-speech-recognition-and-stuttering-event-detection-2406.07256"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-voice-conversion-by-conditional-denoising-training-using-latent-variables-of-recording-quality-and-environment-2406.07280</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-voice-conversion-by-conditional-denoising-training-using-latent-variables-of-recording-quality-and-environment-2406.07280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-voice-conversion-by-conditional-denoising-training-using-latent-variables-of-recording-quality-and-environment-2406.07280"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-investigation-on-speaker-augmentation-for-speaker-recognition-2406.07421</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-investigation-on-speaker-augmentation-for-speaker-recognition-2406.07421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-investigation-on-speaker-augmentation-for-speaker-recognition-2406.07421"/></url>
<url><loc>https://scifaro.com/en/abs/graph-based-multi-feature-fusion-method-for-speech-emotion-recognition-2406.07437</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-based-multi-feature-fusion-method-for-speech-emotion-recognition-2406.07437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-based-multi-feature-fusion-method-for-speech-emotion-recognition-2406.07437"/></url>
<url><loc>https://scifaro.com/en/abs/a-methodological-framework-and-exemplar-protocol-for-the-collection-and-analysis-of-repeated-speech-samples-2406.07497</loc><lastmod>2025-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-methodological-framework-and-exemplar-protocol-for-the-collection-and-analysis-of-repeated-speech-samples-2406.07497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-methodological-framework-and-exemplar-protocol-for-the-collection-and-analysis-of-repeated-speech-samples-2406.07497"/></url>
<url><loc>https://scifaro.com/en/abs/rad-net-2-a-causal-two-stage-repairing-and-denoising-speech-enhancement-network-with-knowledge-distillation-and-complex-axial-self-attention-2406.07498</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rad-net-2-a-causal-two-stage-repairing-and-denoising-speech-enhancement-network-with-knowledge-distillation-and-complex-axial-self-attention-2406.07498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rad-net-2-a-causal-two-stage-repairing-and-denoising-speech-enhancement-network-with-knowledge-distillation-and-complex-axial-self-attention-2406.07498"/></url>
<url><loc>https://scifaro.com/en/abs/hearing-anything-anywhere-2406.07532</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearing-anything-anywhere-2406.07532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearing-anything-anywhere-2406.07532"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-feature-guided-diffusion-model-for-speech-enhancement-2406.07646</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-feature-guided-diffusion-model-for-speech-enhancement-2406.07646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-feature-guided-diffusion-model-for-speech-enhancement-2406.07646"/></url>
<url><loc>https://scifaro.com/en/abs/fastast-accelerating-audio-spectrogram-transformer-via-token-merging-and-cross-model-knowledge-distillation-2406.07676</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastast-accelerating-audio-spectrogram-transformer-via-token-merging-and-cross-model-knowledge-distillation-2406.07676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastast-accelerating-audio-spectrogram-transformer-via-token-merging-and-cross-model-knowledge-distillation-2406.07676"/></url>
<url><loc>https://scifaro.com/en/abs/the-interspeech-2024-challenge-on-speech-processing-using-discrete-units-2406.07725</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interspeech-2024-challenge-on-speech-processing-using-discrete-units-2406.07725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interspeech-2024-challenge-on-speech-processing-using-discrete-units-2406.07725"/></url>
<url><loc>https://scifaro.com/en/abs/emosphere-tts-emotional-style-and-intensity-modeling-via-spherical-emotion-vector-for-controllable-emotional-text-to-speech-2406.07803</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emosphere-tts-emotional-style-and-intensity-modeling-via-spherical-emotion-vector-for-controllable-emotional-text-to-speech-2406.07803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emosphere-tts-emotional-style-and-intensity-modeling-via-spherical-emotion-vector-for-controllable-emotional-text-to-speech-2406.07803"/></url>
<url><loc>https://scifaro.com/en/abs/se-bn-adapter-parametric-efficient-domain-adaptation-for-speaker-recognition-2406.07832</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/se-bn-adapter-parametric-efficient-domain-adaptation-for-speaker-recognition-2406.07832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/se-bn-adapter-parametric-efficient-domain-adaptation-for-speaker-recognition-2406.07832"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-fake-video-detection-by-audio-visual-consistency-2406.07854</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-fake-video-detection-by-audio-visual-consistency-2406.07854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-fake-video-detection-by-audio-visual-consistency-2406.07854"/></url>
<url><loc>https://scifaro.com/en/abs/can-large-language-models-understand-spatial-audio-2406.07914</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-large-language-models-understand-spatial-audio-2406.07914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-large-language-models-understand-spatial-audio-2406.07914"/></url>
<url><loc>https://scifaro.com/en/abs/ctc-aligned-audio-text-embedding-for-streaming-open-vocabulary-keyword-spotting-2406.07923</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctc-aligned-audio-text-embedding-for-streaming-open-vocabulary-keyword-spotting-2406.07923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctc-aligned-audio-text-embedding-for-streaming-open-vocabulary-keyword-spotting-2406.07923"/></url>
<url><loc>https://scifaro.com/en/abs/fakesound-deepfake-general-audio-detection-2406.08052</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fakesound-deepfake-general-audio-detection-2406.08052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fakesound-deepfake-general-audio-detection-2406.08052"/></url>
<url><loc>https://scifaro.com/en/abs/codecfake-an-initial-dataset-for-detecting-llm-based-deepfake-audio-2406.08112</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codecfake-an-initial-dataset-for-detecting-llm-based-deepfake-audio-2406.08112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codecfake-an-initial-dataset-for-detecting-llm-based-deepfake-audio-2406.08112"/></url>
<url><loc>https://scifaro.com/en/abs/freev-free-lunch-for-vocoders-through-pseudo-inversed-mel-filter-2406.08196</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freev-free-lunch-for-vocoders-through-pseudo-inversed-mel-filter-2406.08196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freev-free-lunch-for-vocoders-through-pseudo-inversed-mel-filter-2406.08196"/></url>
<url><loc>https://scifaro.com/en/abs/asynchronous-voice-anonymization-using-adversarial-perturbation-on-speaker-embedding-2406.08200</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asynchronous-voice-anonymization-using-adversarial-perturbation-on-speaker-embedding-2406.08200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asynchronous-voice-anonymization-using-adversarial-perturbation-on-speaker-embedding-2406.08200"/></url>
<url><loc>https://scifaro.com/en/abs/colm-dsr-leveraging-neural-codec-language-modeling-for-multi-modal-dysarthric-speech-reconstruction-2406.08336</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/colm-dsr-leveraging-neural-codec-language-modeling-for-multi-modal-dysarthric-speech-reconstruction-2406.08336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/colm-dsr-leveraging-neural-codec-language-modeling-for-multi-modal-dysarthric-speech-reconstruction-2406.08336"/></url>
<url><loc>https://scifaro.com/en/abs/diff-a-riff-musical-accompaniment-co-creation-via-latent-diffusion-models-2406.08384</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-a-riff-musical-accompaniment-co-creation-via-latent-diffusion-models-2406.08384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-a-riff-musical-accompaniment-co-creation-via-latent-diffusion-models-2406.08384"/></url>
<url><loc>https://scifaro.com/en/abs/toksing-singing-voice-synthesis-based-on-discrete-tokens-2406.08416</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toksing-singing-voice-synthesis-based-on-discrete-tokens-2406.08416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toksing-singing-voice-synthesis-based-on-discrete-tokens-2406.08416"/></url>
<url><loc>https://scifaro.com/en/abs/towards-musically-informed-evaluation-of-piano-transcription-models-2406.08454</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-musically-informed-evaluation-of-piano-transcription-models-2406.08454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-musically-informed-evaluation-of-piano-transcription-models-2406.08454"/></url>
<url><loc>https://scifaro.com/en/abs/training-data-augmentation-for-dysarthric-automatic-speech-recognition-by-text-to-dysarthric-speech-synthesis-2406.08568</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-data-augmentation-for-dysarthric-automatic-speech-recognition-by-text-to-dysarthric-speech-synthesis-2406.08568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-data-augmentation-for-dysarthric-automatic-speech-recognition-by-text-to-dysarthric-speech-synthesis-2406.08568"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-manipulation-through-music-a-deep-learning-interactive-visual-approach-2406.08623</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-manipulation-through-music-a-deep-learning-interactive-visual-approach-2406.08623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-manipulation-through-music-a-deep-learning-interactive-visual-approach-2406.08623"/></url>
<url><loc>https://scifaro.com/en/abs/ml-superb-2-0-benchmarking-multilingual-speech-models-across-modeling-constraints-languages-and-datasets-2406.08641</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ml-superb-2-0-benchmarking-multilingual-speech-models-across-modeling-constraints-languages-and-datasets-2406.08641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ml-superb-2-0-benchmarking-multilingual-speech-models-across-modeling-constraints-languages-and-datasets-2406.08641"/></url>
<url><loc>https://scifaro.com/en/abs/tse-pi-target-sound-extraction-under-reverberant-environments-with-pitch-information-2406.08716</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tse-pi-target-sound-extraction-under-reverberant-environments-with-pitch-information-2406.08716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tse-pi-target-sound-extraction-under-reverberant-environments-with-pitch-information-2406.08716"/></url>
<url><loc>https://scifaro.com/en/abs/visinger2-end-to-end-singing-voice-synthesis-augmented-by-self-supervised-learning-representation-2406.08761</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visinger2-end-to-end-singing-voice-synthesis-augmented-by-self-supervised-learning-representation-2406.08761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visinger2-end-to-end-singing-voice-synthesis-augmented-by-self-supervised-learning-representation-2406.08761"/></url>
<url><loc>https://scifaro.com/en/abs/mff-einv2-multi-scale-feature-fusion-across-spectral-spatial-temporal-domains-for-sound-event-localization-and-detection-2406.08771</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mff-einv2-multi-scale-feature-fusion-across-spectral-spatial-temporal-domains-for-sound-event-localization-and-detection-2406.08771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mff-einv2-multi-scale-feature-fusion-across-spectral-spatial-temporal-domains-for-sound-event-localization-and-detection-2406.08771"/></url>
<url><loc>https://scifaro.com/en/abs/can-synthetic-audio-from-generative-foundation-models-assist-audio-recognition-and-speech-modeling-2406.08800</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-synthetic-audio-from-generative-foundation-models-assist-audio-recognition-and-speech-modeling-2406.08800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-synthetic-audio-from-generative-foundation-models-assist-audio-recognition-and-speech-modeling-2406.08800"/></url>
<url><loc>https://scifaro.com/en/abs/are-we-there-yet-a-brief-survey-of-music-emotion-prediction-datasets-models-and-outstanding-challenges-2406.08809</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-we-there-yet-a-brief-survey-of-music-emotion-prediction-datasets-models-and-outstanding-challenges-2406.08809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-we-there-yet-a-brief-survey-of-music-emotion-prediction-datasets-models-and-outstanding-challenges-2406.08809"/></url>
<url><loc>https://scifaro.com/en/abs/generating-speakers-by-prompting-listener-impressions-for-pre-trained-multi-speaker-text-to-speech-systems-2406.08812</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-speakers-by-prompting-listener-impressions-for-pre-trained-multi-speaker-text-to-speech-systems-2406.08812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-speakers-by-prompting-listener-impressions-for-pre-trained-multi-speaker-text-to-speech-systems-2406.08812"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-temporal-class-activation-representation-for-audio-spoofing-detection-2406.08825</loc><lastmod>2025-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-temporal-class-activation-representation-for-audio-spoofing-detection-2406.08825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-temporal-class-activation-representation-for-audio-spoofing-detection-2406.08825"/></url>
<url><loc>https://scifaro.com/en/abs/effectiveasr-a-single-step-non-autoregressive-mandarin-speech-recognition-architecture-with-high-accuracy-and-inference-speed-2406.08835</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effectiveasr-a-single-step-non-autoregressive-mandarin-speech-recognition-architecture-with-high-accuracy-and-inference-speed-2406.08835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effectiveasr-a-single-step-non-autoregressive-mandarin-speech-recognition-architecture-with-high-accuracy-and-inference-speed-2406.08835"/></url>
<url><loc>https://scifaro.com/en/abs/singomd-singing-oriented-multi-resolution-discrete-representation-construction-from-speech-models-2406.08905</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singomd-singing-oriented-multi-resolution-discrete-representation-construction-from-speech-models-2406.08905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singomd-singing-oriented-multi-resolution-discrete-representation-construction-from-speech-models-2406.08905"/></url>
<url><loc>https://scifaro.com/en/abs/transcription-free-fine-tuning-of-speech-separation-models-for-noisy-and-reverberant-multi-speaker-automatic-speech-recognition-2406.08914</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcription-free-fine-tuning-of-speech-separation-models-for-noisy-and-reverberant-multi-speaker-automatic-speech-recognition-2406.08914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcription-free-fine-tuning-of-speech-separation-models-for-noisy-and-reverberant-multi-speaker-automatic-speech-recognition-2406.08914"/></url>
<url><loc>https://scifaro.com/en/abs/av-gs-learning-material-and-geometry-aware-priors-for-novel-view-acoustic-synthesis-2406.08920</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-gs-learning-material-and-geometry-aware-priors-for-novel-view-acoustic-synthesis-2406.08920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-gs-learning-material-and-geometry-aware-priors-for-novel-view-acoustic-synthesis-2406.08920"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-gaussian-mixture-audio-denoise-2406.09154</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-gaussian-mixture-audio-denoise-2406.09154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-gaussian-mixture-audio-denoise-2406.09154"/></url>
<url><loc>https://scifaro.com/en/abs/complex-image-generative-diffusion-transformer-for-audio-denoising-2406.09161</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-image-generative-diffusion-transformer-for-audio-denoising-2406.09161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-image-generative-diffusion-transformer-for-audio-denoising-2406.09161"/></url>
<url><loc>https://scifaro.com/en/abs/vision-transformer-segmentation-for-visual-bird-sound-denoising-2406.09167</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vision-transformer-segmentation-for-visual-bird-sound-denoising-2406.09167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vision-transformer-segmentation-for-visual-bird-sound-denoising-2406.09167"/></url>
<url><loc>https://scifaro.com/en/abs/pianomotion10m-dataset-and-benchmark-for-hand-motion-generation-in-piano-performance-2406.09326</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pianomotion10m-dataset-and-benchmark-for-hand-motion-generation-in-piano-performance-2406.09326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pianomotion10m-dataset-and-benchmark-for-hand-motion-generation-in-piano-performance-2406.09326"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-phonetic-structure-of-mandarin-using-audacity-2406.09426</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-phonetic-structure-of-mandarin-using-audacity-2406.09426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-phonetic-structure-of-mandarin-using-audacity-2406.09426"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-mix-knowledge-distillation-for-fake-speech-detection-2406.09664</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-mix-knowledge-distillation-for-fake-speech-detection-2406.09664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-mix-knowledge-distillation-for-fake-speech-detection-2406.09664"/></url>
<url><loc>https://scifaro.com/en/abs/vec-tok-vc-residual-enhanced-robust-zero-shot-voice-conversion-with-progressive-constraints-in-a-dual-mode-training-strategy-2406.09844</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vec-tok-vc-residual-enhanced-robust-zero-shot-voice-conversion-with-progressive-constraints-in-a-dual-mode-training-strategy-2406.09844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vec-tok-vc-residual-enhanced-robust-zero-shot-voice-conversion-with-progressive-constraints-in-a-dual-mode-training-strategy-2406.09844"/></url>
<url><loc>https://scifaro.com/en/abs/mmm-multi-layer-multi-residual-multi-stream-discrete-speech-representation-from-self-supervised-learning-model-2406.09869</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmm-multi-layer-multi-residual-multi-stream-discrete-speech-representation-from-self-supervised-learning-model-2406.09869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmm-multi-layer-multi-residual-multi-stream-discrete-speech-representation-from-self-supervised-learning-model-2406.09869"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-speech-enhancement-without-a-separate-speaker-embedding-model-2406.09928</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-speech-enhancement-without-a-separate-speaker-embedding-model-2406.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-speech-enhancement-without-a-separate-speaker-embedding-model-2406.09928"/></url>
<url><loc>https://scifaro.com/en/abs/what-does-it-take-to-generalize-ser-model-across-datasets-a-comprehensive-benchmark-2406.09933</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-does-it-take-to-generalize-ser-model-across-datasets-a-comprehensive-benchmark-2406.09933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-does-it-take-to-generalize-ser-model-across-datasets-a-comprehensive-benchmark-2406.09933"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-text-augmentation-approach-for-contextualized-mandarin-speech-recognition-2406.09950</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-text-augmentation-approach-for-contextualized-mandarin-speech-recognition-2406.09950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-text-augmentation-approach-for-contextualized-mandarin-speech-recognition-2406.09950"/></url>
<url><loc>https://scifaro.com/en/abs/towards-effective-and-efficient-non-autoregressive-decoding-using-block-based-attention-mask-2406.10034</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-effective-and-efficient-non-autoregressive-decoding-using-block-based-attention-mask-2406.10034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-effective-and-efficient-non-autoregressive-decoding-using-block-based-attention-mask-2406.10034"/></url>
<url><loc>https://scifaro.com/en/abs/simul-whisper-attention-guided-streaming-whisper-with-truncation-detection-2406.10052</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simul-whisper-attention-guided-streaming-whisper-with-truncation-detection-2406.10052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simul-whisper-attention-guided-streaming-whisper-with-truncation-detection-2406.10052"/></url>
<url><loc>https://scifaro.com/en/abs/uniaudio-1-5-large-language-model-driven-audio-codec-is-a-few-shot-audio-task-learner-2406.10056</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniaudio-1-5-large-language-model-driven-audio-codec-is-a-few-shot-audio-task-learner-2406.10056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniaudio-1-5-large-language-model-driven-audio-codec-is-a-few-shot-audio-task-learner-2406.10056"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speaker-features-learning-for-audio-visual-multichannel-speech-separation-and-recognition-2406.10152</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speaker-features-learning-for-audio-visual-multichannel-speech-separation-and-recognition-2406.10152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speaker-features-learning-for-audio-visual-multichannel-speech-separation-and-recognition-2406.10152"/></url>
<url><loc>https://scifaro.com/en/abs/one-pass-multiple-conformer-and-foundation-speech-systems-compression-and-quantization-using-an-all-in-one-neural-model-2406.10160</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-pass-multiple-conformer-and-foundation-speech-systems-compression-and-quantization-using-an-all-in-one-neural-model-2406.10160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-pass-multiple-conformer-and-foundation-speech-systems-compression-and-quantization-using-an-all-in-one-neural-model-2406.10160"/></url>
<url><loc>https://scifaro.com/en/abs/how-should-we-extract-discrete-audio-tokens-from-self-supervised-models-2406.10735</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-should-we-extract-discrete-audio-tokens-from-self-supervised-models-2406.10735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-should-we-extract-discrete-audio-tokens-from-self-supervised-models-2406.10735"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-cnn-and-its-use-case-in-digital-healthcare-2406.10741</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-cnn-and-its-use-case-in-digital-healthcare-2406.10741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-cnn-and-its-use-case-in-digital-healthcare-2406.10741"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-automatic-speech-assessment-w-ranksim-regularization-and-hybrid-feature-fusion-strategies-2406.10873</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-automatic-speech-assessment-w-ranksim-regularization-and-hybrid-feature-fusion-strategies-2406.10873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-automatic-speech-assessment-w-ranksim-regularization-and-hybrid-feature-fusion-strategies-2406.10873"/></url>
<url><loc>https://scifaro.com/en/abs/singmos-an-extensive-open-source-singing-voice-dataset-for-mos-prediction-2406.10911</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singmos-an-extensive-open-source-singing-voice-dataset-for-mos-prediction-2406.10911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singmos-an-extensive-open-source-singing-voice-dataset-for-mos-prediction-2406.10911"/></url>
<url><loc>https://scifaro.com/en/abs/imperceptible-rhythm-backdoor-attacks-exploring-rhythm-transformation-for-embedding-undetectable-vulnerabilities-on-speech-recognition-2406.10932</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imperceptible-rhythm-backdoor-attacks-exploring-rhythm-transformation-for-embedding-undetectable-vulnerabilities-on-speech-recognition-2406.10932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imperceptible-rhythm-backdoor-attacks-exploring-rhythm-transformation-for-embedding-undetectable-vulnerabilities-on-speech-recognition-2406.10932"/></url>
<url><loc>https://scifaro.com/en/abs/robust-channel-learning-for-large-scale-radio-speaker-verification-2406.10956</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-channel-learning-for-large-scale-radio-speaker-verification-2406.10956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-channel-learning-for-large-scale-radio-speaker-verification-2406.10956"/></url>
<url><loc>https://scifaro.com/en/abs/joint-audio-and-symbolic-conditioning-for-temporally-controlled-text-to-music-generation-2406.10970</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-audio-and-symbolic-conditioning-for-temporally-controlled-text-to-music-generation-2406.10970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-audio-and-symbolic-conditioning-for-temporally-controlled-text-to-music-generation-2406.10970"/></url>
<url><loc>https://scifaro.com/en/abs/spear-receiver-to-receiver-acoustic-neural-warping-field-2406.11006</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spear-receiver-to-receiver-acoustic-neural-warping-field-2406.11006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spear-receiver-to-receiver-acoustic-neural-warping-field-2406.11006"/></url>
<url><loc>https://scifaro.com/en/abs/outlier-reduction-with-gated-attention-for-improved-post-training-quantization-in-large-sequence-to-sequence-speech-foundation-models-2406.11022</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/outlier-reduction-with-gated-attention-for-improved-post-training-quantization-in-large-sequence-to-sequence-speech-foundation-models-2406.11022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/outlier-reduction-with-gated-attention-for-improved-post-training-quantization-in-large-sequence-to-sequence-speech-foundation-models-2406.11022"/></url>
<url><loc>https://scifaro.com/en/abs/large-language-models-for-dysfluency-detection-in-stuttered-speech-2406.11025</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-language-models-for-dysfluency-detection-in-stuttered-speech-2406.11025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-language-models-for-dysfluency-detection-in-stuttered-speech-2406.11025"/></url>
<url><loc>https://scifaro.com/en/abs/nast-noise-aware-speech-tokenization-for-speech-language-models-2406.11037</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nast-noise-aware-speech-tokenization-for-speech-language-models-2406.11037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nast-noise-aware-speech-tokenization-for-speech-language-models-2406.11037"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-physical-properties-in-acoustic-tubes-using-physics-informed-neural-networks-2406.11119</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-physical-properties-in-acoustic-tubes-using-physics-informed-neural-networks-2406.11119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-physical-properties-in-acoustic-tubes-using-physics-informed-neural-networks-2406.11119"/></url>
<url><loc>https://scifaro.com/en/abs/smru-split-and-merge-recurrent-based-unet-for-acoustic-echo-cancellation-and-noise-suppression-2406.11175</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smru-split-and-merge-recurrent-based-unet-for-acoustic-echo-cancellation-and-noise-suppression-2406.11175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smru-split-and-merge-recurrent-based-unet-for-acoustic-echo-cancellation-and-noise-suppression-2406.11175"/></url>
<url><loc>https://scifaro.com/en/abs/anopatch-towards-better-consistency-in-machine-anomalous-sound-detection-2406.11364</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anopatch-towards-better-consistency-in-machine-anomalous-sound-detection-2406.11364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anopatch-towards-better-consistency-in-machine-anomalous-sound-detection-2406.11364"/></url>
<url><loc>https://scifaro.com/en/abs/gama-a-large-audio-language-model-with-advanced-audio-understanding-and-complex-reasoning-abilities-2406.11768</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gama-a-large-audio-language-model-with-advanced-audio-understanding-and-complex-reasoning-abilities-2406.11768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gama-a-large-audio-language-model-with-advanced-audio-understanding-and-complex-reasoning-abilities-2406.11768"/></url>
<url><loc>https://scifaro.com/en/abs/a-mel-spectrogram-enhancement-paradigm-based-on-cwt-in-speech-synthesis-2406.12164</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mel-spectrogram-enhancement-paradigm-based-on-cwt-in-speech-synthesis-2406.12164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mel-spectrogram-enhancement-paradigm-based-on-cwt-in-speech-synthesis-2406.12164"/></url>
<url><loc>https://scifaro.com/en/abs/interface-design-for-self-supervised-speech-models-2406.12209</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interface-design-for-self-supervised-speech-models-2406.12209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interface-design-for-self-supervised-speech-models-2406.12209"/></url>
<url><loc>https://scifaro.com/en/abs/jen-1-dreamstyler-customized-musical-concept-learning-via-pivotal-parameters-tuning-2406.12292</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jen-1-dreamstyler-customized-musical-concept-learning-via-pivotal-parameters-tuning-2406.12292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jen-1-dreamstyler-customized-musical-concept-learning-via-pivotal-parameters-tuning-2406.12292"/></url>
<url><loc>https://scifaro.com/en/abs/towards-audio-codec-based-speech-separation-2406.12434</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-audio-codec-based-speech-separation-2406.12434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-audio-codec-based-speech-separation-2406.12434"/></url>
<url><loc>https://scifaro.com/en/abs/rapid-language-adaptation-for-multilingual-e2e-speech-recognition-using-encoder-prompting-2406.12611</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rapid-language-adaptation-for-multilingual-e2e-speech-recognition-using-encoder-prompting-2406.12611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rapid-language-adaptation-for-multilingual-e2e-speech-recognition-using-encoder-prompting-2406.12611"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-gap-integrating-pre-trained-speech-enhancement-and-recognition-models-for-robust-speech-recognition-2406.12699</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-gap-integrating-pre-trained-speech-enhancement-and-recognition-models-for-robust-speech-recognition-2406.12699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-gap-integrating-pre-trained-speech-enhancement-and-recognition-models-for-robust-speech-recognition-2406.12699"/></url>
<url><loc>https://scifaro.com/en/abs/ed-skws-early-decision-spiking-neural-networks-for-rapid-and-energy-efficient-keyword-spotting-2406.12726</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ed-skws-early-decision-spiking-neural-networks-for-rapid-and-energy-efficient-keyword-spotting-2406.12726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ed-skws-early-decision-spiking-neural-networks-for-rapid-and-energy-efficient-keyword-spotting-2406.12726"/></url>
<url><loc>https://scifaro.com/en/abs/global-local-convolution-with-spiking-neural-networks-for-energy-efficient-keyword-spotting-2406.13179</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-local-convolution-with-spiking-neural-networks-for-energy-efficient-keyword-spotting-2406.13179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-local-convolution-with-spiking-neural-networks-for-energy-efficient-keyword-spotting-2406.13179"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-automated-audio-captioning-via-large-language-models-with-optimized-audio-encoding-2406.13275</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-automated-audio-captioning-via-large-language-models-with-optimized-audio-encoding-2406.13275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-automated-audio-captioning-via-large-language-models-with-optimized-audio-encoding-2406.13275"/></url>
<url><loc>https://scifaro.com/en/abs/straight-through-gumbel-softmax-estimator-based-bimodal-neural-architecture-search-for-audio-visual-deepfake-detection-2406.13384</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/straight-through-gumbel-softmax-estimator-based-bimodal-neural-architecture-search-for-audio-visual-deepfake-detection-2406.13384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/straight-through-gumbel-softmax-estimator-based-bimodal-neural-architecture-search-for-audio-visual-deepfake-detection-2406.13384"/></url>
<url><loc>https://scifaro.com/en/abs/automated-bioacoustic-monitoring-for-south-african-bird-species-on-unlabeled-data-2406.13579</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-bioacoustic-monitoring-for-south-african-bird-species-on-unlabeled-data-2406.13579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-bioacoustic-monitoring-for-south-african-bird-species-on-unlabeled-data-2406.13579"/></url>
<url><loc>https://scifaro.com/en/abs/improved-remixing-process-for-domain-adaptation-based-speech-enhancement-by-mitigating-data-imbalance-in-signal-to-noise-ratio-2406.13982</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-remixing-process-for-domain-adaptation-based-speech-enhancement-by-mitigating-data-imbalance-in-signal-to-noise-ratio-2406.13982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-remixing-process-for-domain-adaptation-based-speech-enhancement-by-mitigating-data-imbalance-in-signal-to-noise-ratio-2406.13982"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-stream-fusion-approach-with-one-class-learning-for-audio-visual-deepfake-detection-2406.14176</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-stream-fusion-approach-with-one-class-learning-for-audio-visual-deepfake-detection-2406.14176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-stream-fusion-approach-with-one-class-learning-for-audio-visual-deepfake-detection-2406.14176"/></url>
<url><loc>https://scifaro.com/en/abs/dasb-discrete-audio-and-speech-benchmark-2406.14294</loc><lastmod>2026-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dasb-discrete-audio-and-speech-benchmark-2406.14294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dasb-discrete-audio-and-speech-benchmark-2406.14294"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-common-online-speaker-diarization-methods-2406.14464</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-common-online-speaker-diarization-methods-2406.14464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-common-online-speaker-diarization-methods-2406.14464"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-representation-learning-for-environment-agnostic-speaker-recognition-2406.14559</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-representation-learning-for-environment-agnostic-speaker-recognition-2406.14559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-representation-learning-for-environment-agnostic-speaker-recognition-2406.14559"/></url>
<url><loc>https://scifaro.com/en/abs/globe-a-high-quality-english-corpus-with-global-accents-for-zero-shot-speaker-adaptive-text-to-speech-2406.14875</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/globe-a-high-quality-english-corpus-with-global-accents-for-zero-shot-speaker-adaptive-text-to-speech-2406.14875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/globe-a-high-quality-english-corpus-with-global-accents-for-zero-shot-speaker-adaptive-text-to-speech-2406.14875"/></url>
<url><loc>https://scifaro.com/en/abs/breaking-resource-barriers-in-speech-emotion-recognition-via-data-distillation-2406.15119</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breaking-resource-barriers-in-speech-emotion-recognition-via-data-distillation-2406.15119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breaking-resource-barriers-in-speech-emotion-recognition-via-data-distillation-2406.15119"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-techniques-in-automatic-music-transcription-a-systematic-survey-2406.15249</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-techniques-in-automatic-music-transcription-a-systematic-survey-2406.15249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-techniques-in-automatic-music-transcription-a-systematic-survey-2406.15249"/></url>
<url><loc>https://scifaro.com/en/abs/generating-music-with-structure-using-self-similarity-as-attention-2406.15647</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-music-with-structure-using-self-similarity-as-attention-2406.15647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-music-with-structure-using-self-similarity-as-attention-2406.15647"/></url>
<url><loc>https://scifaro.com/en/abs/improving-unsupervised-clean-to-rendered-guitar-tone-transformation-using-gans-and-integrated-unaligned-clean-data-2406.15751</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-unsupervised-clean-to-rendered-guitar-tone-transformation-using-gans-and-integrated-unaligned-clean-data-2406.15751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-unsupervised-clean-to-rendered-guitar-tone-transformation-using-gans-and-integrated-unaligned-clean-data-2406.15751"/></url>
<url><loc>https://scifaro.com/en/abs/ai-based-drone-assisted-human-rescue-in-disaster-environments-challenges-and-opportunities-2406.15875</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-based-drone-assisted-human-rescue-in-disaster-environments-challenges-and-opportunities-2406.15875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-based-drone-assisted-human-rescue-in-disaster-environments-challenges-and-opportunities-2406.15875"/></url>
<url><loc>https://scifaro.com/en/abs/the-music-maestro-or-the-musically-challenged-a-massive-music-evaluation-benchmark-for-large-language-models-2406.15885</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-music-maestro-or-the-musically-challenged-a-massive-music-evaluation-benchmark-for-large-language-models-2406.15885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-music-maestro-or-the-musically-challenged-a-massive-music-evaluation-benchmark-for-large-language-models-2406.15885"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-individual-depression-symptoms-from-acoustic-features-during-speech-2406.16000</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-individual-depression-symptoms-from-acoustic-features-during-speech-2406.16000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-individual-depression-symptoms-from-acoustic-features-during-speech-2406.16000"/></url>
<url><loc>https://scifaro.com/en/abs/audiobench-a-universal-benchmark-for-audio-large-language-models-2406.16020</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiobench-a-universal-benchmark-for-audio-large-language-models-2406.16020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiobench-a-universal-benchmark-for-audio-large-language-models-2406.16020"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representation-analysis-based-on-inter-and-intra-model-similarities-2406.16099</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representation-analysis-based-on-inter-and-intra-model-similarities-2406.16099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representation-analysis-based-on-inter-and-intra-model-similarities-2406.16099"/></url>
<url><loc>https://scifaro.com/en/abs/towards-open-respiratory-acoustic-foundation-models-pretraining-and-benchmarking-2406.16148</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-open-respiratory-acoustic-foundation-models-pretraining-and-benchmarking-2406.16148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-open-respiratory-acoustic-foundation-models-pretraining-and-benchmarking-2406.16148"/></url>
<url><loc>https://scifaro.com/en/abs/listen-and-move-improving-gans-coherency-in-agnostic-sound-to-video-generation-2406.16155</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-and-move-improving-gans-coherency-in-agnostic-sound-to-video-generation-2406.16155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-and-move-improving-gans-coherency-in-agnostic-sound-to-video-generation-2406.16155"/></url>
<url><loc>https://scifaro.com/en/abs/snr-progressive-model-with-harmonic-compensation-for-low-snr-speech-enhancement-2406.16317</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snr-progressive-model-with-harmonic-compensation-for-low-snr-speech-enhancement-2406.16317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snr-progressive-model-with-harmonic-compensation-for-low-snr-speech-enhancement-2406.16317"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-capability-of-mamba-in-speech-applications-2406.16808</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-capability-of-mamba-in-speech-applications-2406.16808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-capability-of-mamba-in-speech-applications-2406.16808"/></url>
<url><loc>https://scifaro.com/en/abs/and-audio-network-dissection-for-interpreting-deep-acoustic-models-2406.16990</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/and-audio-network-dissection-for-interpreting-deep-acoustic-models-2406.16990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/and-audio-network-dissection-for-interpreting-deep-acoustic-models-2406.16990"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-likelihood-estimation-of-the-direction-of-sound-in-a-reverberant-noisy-environment-2406.17103</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-likelihood-estimation-of-the-direction-of-sound-in-a-reverberant-noisy-environment-2406.17103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-likelihood-estimation-of-the-direction-of-sound-in-a-reverberant-noisy-environment-2406.17103"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-synthesis-with-acoustic-waves-2406.17111</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-synthesis-with-acoustic-waves-2406.17111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-synthesis-with-acoustic-waves-2406.17111"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-confidence-estimation-measures-for-speaker-diarization-2406.17124</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-confidence-estimation-measures-for-speaker-diarization-2406.17124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-confidence-estimation-measures-for-speaker-diarization-2406.17124"/></url>
<url><loc>https://scifaro.com/en/abs/sound-tagging-in-infant-centric-home-soundscapes-2406.17190</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-tagging-in-infant-centric-home-soundscapes-2406.17190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-tagging-in-infant-centric-home-soundscapes-2406.17190"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-embeddings-for-detecting-individual-symptoms-of-depression-2406.17229</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-embeddings-for-detecting-individual-symptoms-of-depression-2406.17229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-embeddings-for-detecting-individual-symptoms-of-depression-2406.17229"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-silence-bias-analysis-through-loss-and-asymmetric-approach-in-audio-anti-spoofing-2406.17246</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-silence-bias-analysis-through-loss-and-asymmetric-approach-in-audio-anti-spoofing-2406.17246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-silence-bias-analysis-through-loss-and-asymmetric-approach-in-audio-anti-spoofing-2406.17246"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-channel-modeling-in-multi-head-self-attention-for-synthetic-speech-detection-2406.17376</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-channel-modeling-in-multi-head-self-attention-for-synthetic-speech-detection-2406.17376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-channel-modeling-in-multi-head-self-attention-for-synthetic-speech-detection-2406.17376"/></url>
<url><loc>https://scifaro.com/en/abs/this-paper-had-the-smartest-reviewers-flattery-detection-utilising-an-audio-textual-transformer-based-approach-2406.17667</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/this-paper-had-the-smartest-reviewers-flattery-detection-utilising-an-audio-textual-transformer-based-approach-2406.17667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/this-paper-had-the-smartest-reviewers-flattery-detection-utilising-an-audio-textual-transformer-based-approach-2406.17667"/></url>
<url><loc>https://scifaro.com/en/abs/specmaskgit-masked-generative-modeling-of-audio-spectrograms-for-efficient-audio-synthesis-and-beyond-2406.17672</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specmaskgit-masked-generative-modeling-of-audio-spectrograms-for-efficient-audio-synthesis-and-beyond-2406.17672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specmaskgit-masked-generative-modeling-of-audio-spectrograms-for-efficient-audio-synthesis-and-beyond-2406.17672"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-voice-conversion-voice-conversion-preserving-spatial-information-and-non-target-signals-2406.17722</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-voice-conversion-voice-conversion-preserving-spatial-information-and-non-target-signals-2406.17722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-voice-conversion-voice-conversion-preserving-spatial-information-and-non-target-signals-2406.17722"/></url>
<url><loc>https://scifaro.com/en/abs/emvd-dataset-a-dataset-of-extreme-vocal-distortion-techniques-used-in-heavy-metal-2406.17732</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emvd-dataset-a-dataset-of-extreme-vocal-distortion-techniques-used-in-heavy-metal-2406.17732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emvd-dataset-a-dataset-of-extreme-vocal-distortion-techniques-used-in-heavy-metal-2406.17732"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-speaker-multi-lingual-voice-cloning-system-based-on-vits2-for-limmits-2024-challenge-2406.17801</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-speaker-multi-lingual-voice-cloning-system-based-on-vits2-for-limmits-2024-challenge-2406.17801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-speaker-multi-lingual-voice-cloning-system-based-on-vits2-for-limmits-2024-challenge-2406.17801"/></url>
<url><loc>https://scifaro.com/en/abs/improving-robustness-of-llm-based-speech-synthesis-by-learning-monotonic-alignment-2406.17957</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-robustness-of-llm-based-speech-synthesis-by-learning-monotonic-alignment-2406.17957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-robustness-of-llm-based-speech-synthesis-by-learning-monotonic-alignment-2406.17957"/></url>
<url><loc>https://scifaro.com/en/abs/sc-moe-switch-conformer-mixture-of-experts-for-unified-streaming-and-non-streaming-code-switching-asr-2406.18021</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sc-moe-switch-conformer-mixture-of-experts-for-unified-streaming-and-non-streaming-code-switching-asr-2406.18021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sc-moe-switch-conformer-mixture-of-experts-for-unified-streaming-and-non-streaming-code-switching-asr-2406.18021"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-synthesizing-expressive-violin-performances-approaches-and-comparisons-2406.18089</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-synthesizing-expressive-violin-performances-approaches-and-comparisons-2406.18089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-synthesizing-expressive-violin-performances-approaches-and-comparisons-2406.18089"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-airport-tower-command-recognition-integrating-squeeze-and-excitation-and-broadcasted-residual-learning-2406.18313</loc><lastmod>2024-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-airport-tower-command-recognition-integrating-squeeze-and-excitation-and-broadcasted-residual-learning-2406.18313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-airport-tower-command-recognition-integrating-squeeze-and-excitation-and-broadcasted-residual-learning-2406.18313"/></url>
<url><loc>https://scifaro.com/en/abs/towards-deep-active-learning-in-avian-bioacoustics-2406.18621</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-deep-active-learning-in-avian-bioacoustics-2406.18621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-deep-active-learning-in-avian-bioacoustics-2406.18621"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-prediction-of-amyotrophic-lateral-sclerosis-progression-using-longitudinal-speech-transformer-2406.18625</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-prediction-of-amyotrophic-lateral-sclerosis-progression-using-longitudinal-speech-transformer-2406.18625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-prediction-of-amyotrophic-lateral-sclerosis-progression-using-longitudinal-speech-transformer-2406.18625"/></url>
<url><loc>https://scifaro.com/en/abs/a-stem-agnostic-single-decoder-system-for-music-source-separation-beyond-four-stems-2406.18747</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-stem-agnostic-single-decoder-system-for-music-source-separation-beyond-four-stems-2406.18747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-stem-agnostic-single-decoder-system-for-music-source-separation-beyond-four-stems-2406.18747"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-decoder-only-automatic-speech-recognition-with-discrete-speech-units-a-pilot-study-2406.18862</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-decoder-only-automatic-speech-recognition-with-discrete-speech-units-a-pilot-study-2406.18862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-decoder-only-automatic-speech-recognition-with-discrete-speech-units-a-pilot-study-2406.18862"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-asr-robustness-to-packet-loss-with-a-front-end-adaptation-network-2406.18928</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-asr-robustness-to-packet-loss-with-a-front-end-adaptation-network-2406.18928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-asr-robustness-to-packet-loss-with-a-front-end-adaptation-network-2406.18928"/></url>
<url><loc>https://scifaro.com/en/abs/application-of-asv-for-voice-identification-after-vc-and-duration-predictor-improvement-in-tts-models-2406.19243</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/application-of-asv-for-voice-identification-after-vc-and-duration-predictor-improvement-in-tts-models-2406.19243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/application-of-asv-for-voice-identification-after-vc-and-duration-predictor-improvement-in-tts-models-2406.19243"/></url>
<url><loc>https://scifaro.com/en/abs/subtractive-training-for-music-stem-insertion-using-latent-diffusion-models-2406.19328</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subtractive-training-for-music-stem-insertion-using-latent-diffusion-models-2406.19328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subtractive-training-for-music-stem-insertion-using-latent-diffusion-models-2406.19328"/></url>
<url><loc>https://scifaro.com/en/abs/taming-data-and-transformers-for-audio-generation-2406.19388</loc><lastmod>2025-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taming-data-and-transformers-for-audio-generation-2406.19388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taming-data-and-transformers-for-audio-generation-2406.19388"/></url>
<url><loc>https://scifaro.com/en/abs/network-bending-of-diffusion-models-for-audio-visual-generation-2406.19589</loc><lastmod>2024-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/network-bending-of-diffusion-models-for-audio-visual-generation-2406.19589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/network-bending-of-diffusion-models-for-audio-visual-generation-2406.19589"/></url>
<url><loc>https://scifaro.com/en/abs/saml-speaker-adaptive-mixture-of-lora-experts-for-end-to-end-asr-2406.19706</loc><lastmod>2024-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saml-speaker-adaptive-mixture-of-lora-experts-for-end-to-end-asr-2406.19706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saml-speaker-adaptive-mixture-of-lora-experts-for-end-to-end-asr-2406.19706"/></url>
<url><loc>https://scifaro.com/en/abs/realman-a-real-recorded-and-annotated-microphone-array-dataset-for-dynamic-speech-enhancement-and-localization-2406.19959</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realman-a-real-recorded-and-annotated-microphone-array-dataset-for-dynamic-speech-enhancement-and-localization-2406.19959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realman-a-real-recorded-and-annotated-microphone-array-dataset-for-dynamic-speech-enhancement-and-localization-2406.19959"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-labeled-human-voice-signal-dataset-for-misbehavior-detection-2407.00188</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-labeled-human-voice-signal-dataset-for-misbehavior-detection-2407.00188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-labeled-human-voice-signal-dataset-for-misbehavior-detection-2407.00188"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-continual-learning-scenarios-and-strategies-for-audio-analysis-2407.00465</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-continual-learning-scenarios-and-strategies-for-audio-analysis-2407.00465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-continual-learning-scenarios-and-strategies-for-audio-analysis-2407.00465"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-pretrained-speech-models-for-automatic-speech-assessment-of-voice-disorders-2407.00531</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-pretrained-speech-models-for-automatic-speech-assessment-of-voice-disorders-2407.00531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-pretrained-speech-models-for-automatic-speech-assessment-of-voice-disorders-2407.00531"/></url>
<url><loc>https://scifaro.com/en/abs/improving-real-time-music-accompaniment-separation-with-mmdensenet-2407.00657</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-real-time-music-accompaniment-separation-with-mmdensenet-2407.00657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-real-time-music-accompaniment-separation-with-mmdensenet-2407.00657"/></url>
<url><loc>https://scifaro.com/en/abs/an-attribute-interpolation-method-in-speech-synthesis-by-model-merging-2407.00766</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attribute-interpolation-method-in-speech-synthesis-by-model-merging-2407.00766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attribute-interpolation-method-in-speech-synthesis-by-model-merging-2407.00766"/></url>
<url><loc>https://scifaro.com/en/abs/papez-resource-efficient-speech-separation-with-auditory-working-memory-2407.00888</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/papez-resource-efficient-speech-separation-with-auditory-working-memory-2407.00888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/papez-resource-efficient-speech-separation-with-auditory-working-memory-2407.00888"/></url>
<url><loc>https://scifaro.com/en/abs/are-you-sure-analysing-uncertainty-quantification-approaches-for-real-world-speech-emotion-recognition-2407.01143</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-you-sure-analysing-uncertainty-quantification-approaches-for-real-world-speech-emotion-recognition-2407.01143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-you-sure-analysing-uncertainty-quantification-approaches-for-real-world-speech-emotion-recognition-2407.01143"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-zero-shot-text-to-speech-with-mixture-of-adapters-2407.01291</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-zero-shot-text-to-speech-with-mixture-of-adapters-2407.01291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-zero-shot-text-to-speech-with-mixture-of-adapters-2407.01291"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-speaker-embeddings-in-end-to-end-neural-diarization-for-two-speaker-scenarios-2407.01317</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-speaker-embeddings-in-end-to-end-neural-diarization-for-two-speaker-scenarios-2407.01317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-speaker-embeddings-in-end-to-end-neural-diarization-for-two-speaker-scenarios-2407.01317"/></url>
<url><loc>https://scifaro.com/en/abs/on-feature-learning-for-titi-monkey-activity-detection-2407.01452</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-feature-learning-for-titi-monkey-activity-detection-2407.01452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-feature-learning-for-titi-monkey-activity-detection-2407.01452"/></url>
<url><loc>https://scifaro.com/en/abs/pictures-of-midi-controlled-music-generation-via-graphical-prompts-for-image-based-diffusion-inpainting-2407.01499</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pictures-of-midi-controlled-music-generation-via-graphical-prompts-for-image-based-diffusion-inpainting-2407.01499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pictures-of-midi-controlled-music-generation-via-graphical-prompts-for-image-based-diffusion-inpainting-2407.01499"/></url>
<url><loc>https://scifaro.com/en/abs/deepfake-audio-detection-using-spectrogram-based-feature-and-ensemble-of-deep-learning-models-2407.01777</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfake-audio-detection-using-spectrogram-based-feature-and-ensemble-of-deep-learning-models-2407.01777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfake-audio-detection-using-spectrogram-based-feature-and-ensemble-of-deep-learning-models-2407.01777"/></url>
<url><loc>https://scifaro.com/en/abs/constant-directivity-loudspeaker-beamforming-2407.01860</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/constant-directivity-loudspeaker-beamforming-2407.01860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/constant-directivity-loudspeaker-beamforming-2407.01860"/></url>
<url><loc>https://scifaro.com/en/abs/towards-training-music-taggers-on-synthetic-data-2407.02156</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-training-music-taggers-on-synthetic-data-2407.02156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-training-music-taggers-on-synthetic-data-2407.02156"/></url>
<url><loc>https://scifaro.com/en/abs/gmm-resnet2-ensemble-of-group-resnet-networks-for-synthetic-speech-detection-2407.02170</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gmm-resnet2-ensemble-of-group-resnet-networks-for-synthetic-speech-detection-2407.02170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gmm-resnet2-ensemble-of-group-resnet-networks-for-synthetic-speech-detection-2407.02170"/></url>
<url><loc>https://scifaro.com/en/abs/melodyt5-a-unified-score-to-score-transformer-for-symbolic-music-processing-2407.02277</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melodyt5-a-unified-score-to-score-transformer-for-symbolic-music-processing-2407.02277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melodyt5-a-unified-score-to-score-transformer-for-symbolic-music-processing-2407.02277"/></url>
<url><loc>https://scifaro.com/en/abs/the-solution-for-temporal-sound-localisation-task-of-iccv-1st-perception-test-challenge-2023-2407.02318</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-solution-for-temporal-sound-localisation-task-of-iccv-1st-perception-test-challenge-2023-2407.02318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-solution-for-temporal-sound-localisation-task-of-iccv-1st-perception-test-challenge-2023-2407.02318"/></url>
<url><loc>https://scifaro.com/en/abs/audiotime-a-temporally-aligned-audio-text-benchmark-dataset-2407.02857</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiotime-a-temporally-aligned-audio-text-benchmark-dataset-2407.02857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiotime-a-temporally-aligned-audio-text-benchmark-dataset-2407.02857"/></url>
<url><loc>https://scifaro.com/en/abs/picoaudio-enabling-precise-timestamp-and-frequency-controllability-of-audio-events-in-text-to-audio-generation-2407.02869</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/picoaudio-enabling-precise-timestamp-and-frequency-controllability-of-audio-events-in-text-to-audio-generation-2407.02869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/picoaudio-enabling-precise-timestamp-and-frequency-controllability-of-audio-events-in-text-to-audio-generation-2407.02869"/></url>
<url><loc>https://scifaro.com/en/abs/qifusion-net-layer-adapted-stream-non-stream-model-for-end-to-end-multi-accent-speech-recognition-2407.03026</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qifusion-net-layer-adapted-stream-non-stream-model-for-end-to-end-multi-accent-speech-recognition-2407.03026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qifusion-net-layer-adapted-stream-non-stream-model-for-end-to-end-multi-accent-speech-recognition-2407.03026"/></url>
<url><loc>https://scifaro.com/en/abs/a-toolchain-for-comprehensive-audio-video-analysis-using-deep-learning-based-multimodal-approach-a-use-case-of-riot-or-violent-context-detection-2407.03110</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-toolchain-for-comprehensive-audio-video-analysis-using-deep-learning-based-multimodal-approach-a-use-case-of-riot-or-violent-context-detection-2407.03110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-toolchain-for-comprehensive-audio-video-analysis-using-deep-learning-based-multimodal-approach-a-use-case-of-riot-or-violent-context-detection-2407.03110"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-text-independent-estimation-of-articulatory-movements-and-phoneme-alignments-from-speech-2407.03132</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-text-independent-estimation-of-articulatory-movements-and-phoneme-alignments-from-speech-2407.03132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-text-independent-estimation-of-articulatory-movements-and-phoneme-alignments-from-speech-2407.03132"/></url>
<url><loc>https://scifaro.com/en/abs/gmm-resnext-combining-generative-and-discriminative-models-for-speaker-verification-2407.03135</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gmm-resnext-combining-generative-and-discriminative-models-for-speaker-verification-2407.03135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gmm-resnext-combining-generative-and-discriminative-models-for-speaker-verification-2407.03135"/></url>
<url><loc>https://scifaro.com/en/abs/mudit-musit-alignment-with-colloquial-expression-in-description-to-song-generation-2407.03188</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mudit-musit-alignment-with-colloquial-expression-in-description-to-song-generation-2407.03188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mudit-musit-alignment-with-colloquial-expression-in-description-to-song-generation-2407.03188"/></url>
<url><loc>https://scifaro.com/en/abs/pianobart-symbolic-piano-music-generation-and-understanding-with-large-scale-pre-training-2407.03361</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pianobart-symbolic-piano-music-generation-and-understanding-with-large-scale-pre-training-2407.03361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pianobart-symbolic-piano-music-generation-and-understanding-with-large-scale-pre-training-2407.03361"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-framework-for-animal-sound-classification-with-features-optimization-2407.03440</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-framework-for-animal-sound-classification-with-features-optimization-2407.03440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-framework-for-animal-sound-classification-with-features-optimization-2407.03440"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-driven-privacy-preserving-dementia-detection-2407.03470</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-driven-privacy-preserving-dementia-detection-2407.03470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-driven-privacy-preserving-dementia-detection-2407.03470"/></url>
<url><loc>https://scifaro.com/en/abs/towards-attention-based-contrastive-learning-for-audio-spoof-detection-2407.03514</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-attention-based-contrastive-learning-for-audio-spoof-detection-2407.03514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-attention-based-contrastive-learning-for-audio-spoof-detection-2407.03514"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-grouping-network-for-audio-source-separation-2407.03736</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-grouping-network-for-audio-source-separation-2407.03736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-grouping-network-for-audio-source-separation-2407.03736"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-spectral-kurtosis-and-double-deep-priors-2407.03887</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-spectral-kurtosis-and-double-deep-priors-2407.03887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-spectral-kurtosis-and-double-deep-priors-2407.03887"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effectiveness-of-acoustic-bpe-in-decoder-only-tts-2407.03892</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effectiveness-of-acoustic-bpe-in-decoder-only-tts-2407.03892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effectiveness-of-acoustic-bpe-in-decoder-only-tts-2407.03892"/></url>
<url><loc>https://scifaro.com/en/abs/serialized-output-training-by-learned-dominance-2407.03966</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serialized-output-training-by-learned-dominance-2407.03966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serialized-output-training-by-learned-dominance-2407.03966"/></url>
<url><loc>https://scifaro.com/en/abs/funaudiollm-voice-understanding-and-generation-foundation-models-for-natural-interaction-between-humans-and-llms-2407.04051</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/funaudiollm-voice-understanding-and-generation-foundation-models-for-natural-interaction-between-humans-and-llms-2407.04051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/funaudiollm-voice-understanding-and-generation-foundation-models-for-natural-interaction-between-humans-and-llms-2407.04051"/></url>
<url><loc>https://scifaro.com/en/abs/musebarcontrol-enhancing-fine-grained-control-in-symbolic-music-generation-through-pre-training-and-counterfactual-loss-2407.04331</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musebarcontrol-enhancing-fine-grained-control-in-symbolic-music-generation-through-pre-training-and-counterfactual-loss-2407.04331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musebarcontrol-enhancing-fine-grained-control-in-symbolic-music-generation-through-pre-training-and-counterfactual-loss-2407.04331"/></url>
<url><loc>https://scifaro.com/en/abs/paguri-a-user-experience-study-of-creative-interaction-with-text-to-music-models-2407.04333</loc><lastmod>2025-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paguri-a-user-experience-study-of-creative-interaction-with-text-to-music-models-2407.04333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paguri-a-user-experience-study-of-creative-interaction-with-text-to-music-models-2407.04333"/></url>
<url><loc>https://scifaro.com/en/abs/a-mapping-strategy-for-interacting-with-latent-audio-synthesis-using-artistic-materials-2407.04379</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mapping-strategy-for-interacting-with-latent-audio-synthesis-using-artistic-materials-2407.04379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mapping-strategy-for-interacting-with-latent-audio-synthesis-using-artistic-materials-2407.04379"/></url>
<url><loc>https://scifaro.com/en/abs/sound-vecaps-improving-audio-generation-with-visual-enhanced-captions-2407.04416</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-vecaps-improving-audio-generation-with-visual-enhanced-captions-2407.04416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-vecaps-improving-audio-generation-with-visual-enhanced-captions-2407.04416"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-whisper-universal-acoustic-adversarial-attacks-to-control-speech-foundation-models-2407.04482</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-whisper-universal-acoustic-adversarial-attacks-to-control-speech-foundation-models-2407.04482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-whisper-universal-acoustic-adversarial-attacks-to-control-speech-foundation-models-2407.04482"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-timbre-remapping-with-differentiable-dsp-2407.04547</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-timbre-remapping-with-differentiable-dsp-2407.04547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-timbre-remapping-with-differentiable-dsp-2407.04547"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-speech-quality-prediction-through-quantization-aware-training-and-binary-activation-maps-2407.04578</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-speech-quality-prediction-through-quantization-aware-training-and-binary-activation-maps-2407.04578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-speech-quality-prediction-through-quantization-aware-training-and-binary-activation-maps-2407.04578"/></url>
<url><loc>https://scifaro.com/en/abs/all-neural-low-latency-directional-speech-extraction-2407.04879</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-neural-low-latency-directional-speech-extraction-2407.04879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-neural-low-latency-directional-speech-extraction-2407.04879"/></url>
<url><loc>https://scifaro.com/en/abs/a-reference-free-metric-for-language-queried-audio-source-separation-using-contrastive-language-audio-pretraining-2407.04936</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-reference-free-metric-for-language-queried-audio-source-separation-using-contrastive-language-audio-pretraining-2407.04936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-reference-free-metric-for-language-queried-audio-source-separation-using-contrastive-language-audio-pretraining-2407.04936"/></url>
<url><loc>https://scifaro.com/en/abs/a-layer-anchoring-strategy-for-enhancing-cross-lingual-speech-emotion-recognition-2407.04966</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-layer-anchoring-strategy-for-enhancing-cross-lingual-speech-emotion-recognition-2407.04966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-layer-anchoring-strategy-for-enhancing-cross-lingual-speech-emotion-recognition-2407.04966"/></url>
<url><loc>https://scifaro.com/en/abs/music-era-recognition-using-supervised-contrastive-learning-and-artist-information-2407.05368</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-era-recognition-using-supervised-contrastive-learning-and-artist-information-2407.05368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-era-recognition-using-supervised-contrastive-learning-and-artist-information-2407.05368"/></url>
<url><loc>https://scifaro.com/en/abs/research-on-the-acoustic-emission-source-localization-methodology-in-composite-materials-based-on-artificial-intelligence-2407.05405</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-on-the-acoustic-emission-source-localization-methodology-in-composite-materials-based-on-artificial-intelligence-2407.05405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-on-the-acoustic-emission-source-localization-methodology-in-composite-materials-based-on-artificial-intelligence-2407.05405"/></url>
<url><loc>https://scifaro.com/en/abs/cosyvoice-a-scalable-multilingual-zero-shot-text-to-speech-synthesizer-based-on-supervised-semantic-tokens-2407.05407</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosyvoice-a-scalable-multilingual-zero-shot-text-to-speech-synthesizer-based-on-supervised-semantic-tokens-2407.05407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosyvoice-a-scalable-multilingual-zero-shot-text-to-speech-synthesizer-based-on-supervised-semantic-tokens-2407.05407"/></url>
<url><loc>https://scifaro.com/en/abs/two-path-gmm-resnet-and-gmm-senet-for-asv-spoofing-detection-2407.05605</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-path-gmm-resnet-and-gmm-senet-for-asv-spoofing-detection-2407.05605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-path-gmm-resnet-and-gmm-senet-for-asv-spoofing-detection-2407.05605"/></url>
<url><loc>https://scifaro.com/en/abs/a-benchmark-for-multi-speaker-anonymization-2407.05608</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-benchmark-for-multi-speaker-anonymization-2407.05608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-benchmark-for-multi-speaker-anonymization-2407.05608"/></url>
<url><loc>https://scifaro.com/en/abs/sequential-contrastive-audio-visual-learning-2407.05782</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequential-contrastive-audio-visual-learning-2407.05782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequential-contrastive-audio-visual-learning-2407.05782"/></url>
<url><loc>https://scifaro.com/en/abs/cervical-auscultation-machine-learning-for-dysphagia-assessment-2407.05870</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cervical-auscultation-machine-learning-for-dysphagia-assessment-2407.05870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cervical-auscultation-machine-learning-for-dysphagia-assessment-2407.05870"/></url>
<url><loc>https://scifaro.com/en/abs/merge-a-bimodal-audio-lyrics-dataset-for-static-music-emotion-recognition-2407.06060</loc><lastmod>2025-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/merge-a-bimodal-audio-lyrics-dataset-for-static-music-emotion-recognition-2407.06060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/merge-a-bimodal-audio-lyrics-dataset-for-static-music-emotion-recognition-2407.06060"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-keyword-spotting-from-mixed-speech-2407.06078</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-keyword-spotting-from-mixed-speech-2407.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-keyword-spotting-from-mixed-speech-2407.06078"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-with-pseudo-multi-label-birdcall-classification-for-ds-gt-birdclef-2024-2407.06291</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-with-pseudo-multi-label-birdcall-classification-for-ds-gt-birdclef-2024-2407.06291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-with-pseudo-multi-label-birdcall-classification-for-ds-gt-birdclef-2024-2407.06291"/></url>
<url><loc>https://scifaro.com/en/abs/homogeneous-speaker-features-for-on-the-fly-dysarthric-and-elderly-speaker-adaptation-2407.06310</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/homogeneous-speaker-features-for-on-the-fly-dysarthric-and-elderly-speaker-adaptation-2407.06310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/homogeneous-speaker-features-for-on-the-fly-dysarthric-and-elderly-speaker-adaptation-2407.06310"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-enhancement-by-integrating-inter-channel-and-band-features-with-dual-branch-conformer-2407.06524</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-enhancement-by-integrating-inter-channel-and-band-features-with-dual-branch-conformer-2407.06524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-enhancement-by-integrating-inter-channel-and-band-features-with-dual-branch-conformer-2407.06524"/></url>
<url><loc>https://scifaro.com/en/abs/audio-language-datasets-of-scenes-and-events-a-survey-2407.06947</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-language-datasets-of-scenes-and-events-a-survey-2407.06947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-language-datasets-of-scenes-and-events-a-survey-2407.06947"/></url>
<url><loc>https://scifaro.com/en/abs/speech-after-gender-a-trans-feminine-perspective-on-next-steps-for-speech-science-and-technology-2407.07235</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-after-gender-a-trans-feminine-perspective-on-next-steps-for-speech-science-and-technology-2407.07235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-after-gender-a-trans-feminine-perspective-on-next-steps-for-speech-science-and-technology-2407.07235"/></url>
<url><loc>https://scifaro.com/en/abs/simusoe-a-simulated-snoring-dataset-for-obstructive-sleep-apnea-hypopnea-syndrome-evaluation-during-wakefulness-2407.07397</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simusoe-a-simulated-snoring-dataset-for-obstructive-sleep-apnea-hypopnea-syndrome-evaluation-during-wakefulness-2407.07397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simusoe-a-simulated-snoring-dataset-for-obstructive-sleep-apnea-hypopnea-syndrome-evaluation-during-wakefulness-2407.07397"/></url>
<url><loc>https://scifaro.com/en/abs/stone-self-supervised-tonality-estimator-2407.07408</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stone-self-supervised-tonality-estimator-2407.07408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stone-self-supervised-tonality-estimator-2407.07408"/></url>
<url><loc>https://scifaro.com/en/abs/video-to-audio-generation-with-hidden-alignment-2407.07464</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video-to-audio-generation-with-hidden-alignment-2407.07464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video-to-audio-generation-with-hidden-alignment-2407.07464"/></url>
<url><loc>https://scifaro.com/en/abs/targeted-augmented-data-for-audio-deepfake-detection-2407.07598</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/targeted-augmented-data-for-audio-deepfake-detection-2407.07598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/targeted-augmented-data-for-audio-deepfake-detection-2407.07598"/></url>
<url><loc>https://scifaro.com/en/abs/samoye-zero-shot-singing-voice-conversion-model-based-on-feature-disentanglement-and-enhancement-2407.07728</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samoye-zero-shot-singing-voice-conversion-model-based-on-feature-disentanglement-and-enhancement-2407.07728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samoye-zero-shot-singing-voice-conversion-model-based-on-feature-disentanglement-and-enhancement-2407.07728"/></url>
<url><loc>https://scifaro.com/en/abs/rt-la-voce-real-time-low-snr-audio-visual-speech-enhancement-2407.07825</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rt-la-voce-real-time-low-snr-audio-visual-speech-enhancement-2407.07825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rt-la-voce-real-time-low-snr-audio-visual-speech-enhancement-2407.07825"/></url>
<url><loc>https://scifaro.com/en/abs/an-unsupervised-domain-adaptation-method-for-locating-manipulated-region-in-partially-fake-audio-2407.08239</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-unsupervised-domain-adaptation-method-for-locating-manipulated-region-in-partially-fake-audio-2407.08239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-unsupervised-domain-adaptation-method-for-locating-manipulated-region-in-partially-fake-audio-2407.08239"/></url>
<url><loc>https://scifaro.com/en/abs/let-network-decide-what-to-learn-symbolic-music-understanding-model-based-on-large-scale-adversarial-pre-training-2407.08306</loc><lastmod>2025-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/let-network-decide-what-to-learn-symbolic-music-understanding-model-based-on-large-scale-adversarial-pre-training-2407.08306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/let-network-decide-what-to-learn-symbolic-music-understanding-model-based-on-large-scale-adversarial-pre-training-2407.08306"/></url>
<url><loc>https://scifaro.com/en/abs/from-real-to-cloned-singer-identification-2407.08647</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-real-to-cloned-singer-identification-2407.08647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-real-to-cloned-singer-identification-2407.08647"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-constrained-on-room-impulse-response-characteristics-2407.08657</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-constrained-on-room-impulse-response-characteristics-2407.08657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-constrained-on-room-impulse-response-characteristics-2407.08657"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-voice-command-pipelines-for-drone-control-from-stt-and-llm-to-direct-classification-and-siamese-networks-2407.08658</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-voice-command-pipelines-for-drone-control-from-stt-and-llm-to-direct-classification-and-siamese-networks-2407.08658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-voice-command-pipelines-for-drone-control-from-stt-and-llm-to-direct-classification-and-siamese-networks-2407.08658"/></url>
<url><loc>https://scifaro.com/en/abs/elasticast-an-audio-spectrogram-transformer-for-all-length-and-resolutions-2407.08691</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/elasticast-an-audio-spectrogram-transformer-for-all-length-and-resolutions-2407.08691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/elasticast-an-audio-spectrogram-transformer-for-all-length-and-resolutions-2407.08691"/></url>
<url><loc>https://scifaro.com/en/abs/audio-spotforming-using-nonnegative-tensor-factorization-with-attractor-based-regularization-2407.08951</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-spotforming-using-nonnegative-tensor-factorization-with-attractor-based-regularization-2407.08951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-spotforming-using-nonnegative-tensor-factorization-with-attractor-based-regularization-2407.08951"/></url>
<url><loc>https://scifaro.com/en/abs/music-proofreading-with-refinpaint-where-and-how-to-modify-compositions-given-context-2407.09099</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-proofreading-with-refinpaint-where-and-how-to-modify-compositions-given-context-2407.09099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-proofreading-with-refinpaint-where-and-how-to-modify-compositions-given-context-2407.09099"/></url>
<url><loc>https://scifaro.com/en/abs/a-preliminary-investigation-on-flexible-singing-voice-synthesis-through-decomposed-framework-with-inferrable-features-2407.09346</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-preliminary-investigation-on-flexible-singing-voice-synthesis-through-decomposed-framework-with-inferrable-features-2407.09346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-preliminary-investigation-on-flexible-singing-voice-synthesis-through-decomposed-framework-with-inferrable-features-2407.09346"/></url>
<url><loc>https://scifaro.com/en/abs/ecvoice-audio-text-extraction-and-optimization-of-video-based-on-idioms-similarity-replacement-2407.09489</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecvoice-audio-text-extraction-and-optimization-of-video-based-on-idioms-similarity-replacement-2407.09489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecvoice-audio-text-extraction-and-optimization-of-video-based-on-idioms-similarity-replacement-2407.09489"/></url>
<url><loc>https://scifaro.com/en/abs/empowering-whisper-as-a-joint-multi-talker-and-target-talker-speech-recognition-system-2407.09817</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empowering-whisper-as-a-joint-multi-talker-and-target-talker-speech-recognition-system-2407.09817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empowering-whisper-as-a-joint-multi-talker-and-target-talker-speech-recognition-system-2407.09817"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-sv-adapting-whisper-for-low-data-resource-speaker-verification-2407.10048</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-sv-adapting-whisper-for-low-data-resource-speaker-verification-2407.10048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-sv-adapting-whisper-for-low-data-resource-speaker-verification-2407.10048"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-frame-level-embedding-learning-system-2407.10182</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-frame-level-embedding-learning-system-2407.10182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-frame-level-embedding-learning-system-2407.10182"/></url>
<url><loc>https://scifaro.com/en/abs/cuside-t-chunking-simulating-future-and-decoding-for-transducer-based-streaming-asr-2407.10255</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuside-t-chunking-simulating-future-and-decoding-for-transducer-based-streaming-asr-2407.10255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuside-t-chunking-simulating-future-and-decoding-for-transducer-based-streaming-asr-2407.10255"/></url>
<url><loc>https://scifaro.com/en/abs/the-interpretation-gap-in-text-to-music-generation-models-2407.10328</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interpretation-gap-in-text-to-music-generation-models-2407.10328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interpretation-gap-in-text-to-music-generation-models-2407.10328"/></url>
<url><loc>https://scifaro.com/en/abs/mutual-learning-for-acoustic-matching-and-dereverberation-via-visual-scene-driven-diffusion-2407.10373</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mutual-learning-for-acoustic-matching-and-dereverberation-via-visual-scene-driven-diffusion-2407.10373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mutual-learning-for-acoustic-matching-and-dereverberation-via-visual-scene-driven-diffusion-2407.10373"/></url>
<url><loc>https://scifaro.com/en/abs/masked-generative-video-to-audio-transformers-with-enhanced-synchronicity-2407.10387</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-generative-video-to-audio-transformers-with-enhanced-synchronicity-2407.10387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-generative-video-to-audio-transformers-with-enhanced-synchronicity-2407.10387"/></url>
<url><loc>https://scifaro.com/en/abs/ddfad-dataset-distillation-framework-for-audio-data-2407.10446</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddfad-dataset-distillation-framework-for-audio-data-2407.10446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddfad-dataset-distillation-framework-for-audio-data-2407.10446"/></url>
<url><loc>https://scifaro.com/en/abs/bandcondinet-parallel-transformers-based-conditional-popular-music-generation-with-multi-view-features-2407.10462</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandcondinet-parallel-transformers-based-conditional-popular-music-generation-with-multi-view-features-2407.10462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandcondinet-parallel-transformers-based-conditional-popular-music-generation-with-multi-view-features-2407.10462"/></url>
<url><loc>https://scifaro.com/en/abs/litefocus-accelerated-diffusion-inference-for-long-audio-synthesis-2407.10468</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/litefocus-accelerated-diffusion-inference-for-long-audio-synthesis-2407.10468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/litefocus-accelerated-diffusion-inference-for-long-audio-synthesis-2407.10468"/></url>
<url><loc>https://scifaro.com/en/abs/towards-zero-shot-amplifier-modeling-one-to-many-amplifier-modeling-via-tone-embedding-control-2407.10646</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-zero-shot-amplifier-modeling-one-to-many-amplifier-modeling-via-tone-embedding-control-2407.10646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-zero-shot-amplifier-modeling-one-to-many-amplifier-modeling-via-tone-embedding-control-2407.10646"/></url>
<url><loc>https://scifaro.com/en/abs/towards-enhanced-classification-of-abnormal-lung-sound-in-multi-breath-a-light-weight-multi-label-and-multi-head-attention-classification-method-2407.10828</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-enhanced-classification-of-abnormal-lung-sound-in-multi-breath-a-light-weight-multi-label-and-multi-head-attention-classification-method-2407.10828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-enhanced-classification-of-abnormal-lung-sound-in-multi-breath-a-light-weight-multi-label-and-multi-head-attention-classification-method-2407.10828"/></url>
<url><loc>https://scifaro.com/en/abs/a-pilot-study-of-gslm-based-simulation-of-foreign-accentuation-only-using-native-speech-corpora-2407.11370</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pilot-study-of-gslm-based-simulation-of-foreign-accentuation-only-using-native-speech-corpora-2407.11370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pilot-study-of-gslm-based-simulation-of-foreign-accentuation-only-using-native-speech-corpora-2407.11370"/></url>
<url><loc>https://scifaro.com/en/abs/mmsd-net-towards-multi-modal-stuttering-detection-2407.11492</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmsd-net-towards-multi-modal-stuttering-detection-2407.11492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmsd-net-towards-multi-modal-stuttering-detection-2407.11492"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-effect-of-label-topology-and-training-criterion-on-asr-performance-and-alignment-quality-2407.11641</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-effect-of-label-topology-and-training-criterion-on-asr-performance-and-alignment-quality-2407.11641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-effect-of-label-topology-and-training-criterion-on-asr-performance-and-alignment-quality-2407.11641"/></url>
<url><loc>https://scifaro.com/en/abs/audio-conditioning-for-music-generation-via-discrete-bottleneck-features-2407.12563</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-conditioning-for-music-generation-via-discrete-bottleneck-features-2407.12563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-conditioning-for-music-generation-via-discrete-bottleneck-features-2407.12563"/></url>
<url><loc>https://scifaro.com/en/abs/graphmuse-a-library-for-symbolic-music-graph-processing-2407.12671</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graphmuse-a-library-for-symbolic-music-graph-processing-2407.12671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graphmuse-a-library-for-symbolic-music-graph-processing-2407.12671"/></url>
<url><loc>https://scifaro.com/en/abs/pre-trained-foundation-model-representations-to-uncover-breathing-patterns-in-speech-2407.13035</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-trained-foundation-model-representations-to-uncover-breathing-patterns-in-speech-2407.13035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-trained-foundation-model-representations-to-uncover-breathing-patterns-in-speech-2407.13035"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-and-driving-human-body-soundfields-through-acoustic-primitives-2407.13083</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-and-driving-human-body-soundfields-through-acoustic-primitives-2407.13083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-and-driving-human-body-soundfields-through-acoustic-primitives-2407.13083"/></url>
<url><loc>https://scifaro.com/en/abs/divesound-llm-assisted-automatic-taxonomy-construction-for-diverse-audio-generation-2407.13198</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/divesound-llm-assisted-automatic-taxonomy-construction-for-diverse-audio-generation-2407.13198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/divesound-llm-assisted-automatic-taxonomy-construction-for-diverse-audio-generation-2407.13198"/></url>
<url><loc>https://scifaro.com/en/abs/underwater-acoustic-signal-denoising-algorithms-a-survey-of-the-state-of-the-art-2407.13264</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underwater-acoustic-signal-denoising-algorithms-a-survey-of-the-state-of-the-art-2407.13264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underwater-acoustic-signal-denoising-algorithms-a-survey-of-the-state-of-the-art-2407.13264"/></url>
<url><loc>https://scifaro.com/en/abs/how-private-is-low-frequency-speech-audio-in-the-wild-an-analysis-of-verbal-intelligibility-by-humans-and-machines-2407.13266</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-private-is-low-frequency-speech-audio-in-the-wild-an-analysis-of-verbal-intelligibility-by-humans-and-machines-2407.13266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-private-is-low-frequency-speech-audio-in-the-wild-an-analysis-of-verbal-intelligibility-by-humans-and-machines-2407.13266"/></url>
<url><loc>https://scifaro.com/en/abs/low-resourced-speech-recognition-for-iu-mien-language-via-weakly-supervised-phoneme-based-multilingual-pre-training-2407.13292</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resourced-speech-recognition-for-iu-mien-language-via-weakly-supervised-phoneme-based-multilingual-pre-training-2407.13292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resourced-speech-recognition-for-iu-mien-language-via-weakly-supervised-phoneme-based-multilingual-pre-training-2407.13292"/></url>
<url><loc>https://scifaro.com/en/abs/using-speech-foundational-models-in-loss-functions-for-hearing-aid-speech-enhancement-2407.13333</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-speech-foundational-models-in-loss-functions-for-hearing-aid-speech-enhancement-2407.13333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-speech-foundational-models-in-loss-functions-for-hearing-aid-speech-enhancement-2407.13333"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-barriers-to-the-use-of-marginalised-music-genres-in-ai-2407.13439</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-barriers-to-the-use-of-marginalised-music-genres-in-ai-2407.13439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-barriers-to-the-use-of-marginalised-music-genres-in-ai-2407.13439"/></url>
<url><loc>https://scifaro.com/en/abs/spontaneous-style-text-to-speech-synthesis-with-controllable-spontaneous-behaviors-based-on-language-models-2407.13509</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spontaneous-style-text-to-speech-synthesis-with-controllable-spontaneous-behaviors-based-on-language-models-2407.13509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spontaneous-style-text-to-speech-synthesis-with-controllable-spontaneous-behaviors-based-on-language-models-2407.13509"/></url>
<url><loc>https://scifaro.com/en/abs/braille-to-speech-generator-audio-generation-based-on-joint-fine-tuning-of-clip-and-fastspeech2-2407.14212</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/braille-to-speech-generator-audio-generation-based-on-joint-fine-tuning-of-clip-and-fastspeech2-2407.14212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/braille-to-speech-generator-audio-generation-based-on-joint-fine-tuning-of-clip-and-fastspeech2-2407.14212"/></url>
<url><loc>https://scifaro.com/en/abs/guitar-chord-diagram-suggestion-for-western-popular-music-2407.14260</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guitar-chord-diagram-suggestion-for-western-popular-music-2407.14260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guitar-chord-diagram-suggestion-for-western-popular-music-2407.14260"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-audio-captioning-with-encoder-level-knowledge-distillation-2407.14329</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-audio-captioning-with-encoder-level-knowledge-distillation-2407.14329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-audio-captioning-with-encoder-level-knowledge-distillation-2407.14329"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-zero-shot-audio-classification-using-sound-attribute-knowledge-from-large-language-models-2407.14355</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-zero-shot-audio-classification-using-sound-attribute-knowledge-from-large-language-models-2407.14355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-zero-shot-audio-classification-using-sound-attribute-knowledge-from-large-language-models-2407.14355"/></url>
<url><loc>https://scifaro.com/en/abs/stable-audio-open-2407.14358</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stable-audio-open-2407.14358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stable-audio-open-2407.14358"/></url>
<url><loc>https://scifaro.com/en/abs/towards-assessing-data-replication-in-music-generation-with-music-similarity-metrics-on-raw-audio-2407.14364</loc><lastmod>2025-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-assessing-data-replication-in-music-generation-with-music-similarity-metrics-on-raw-audio-2407.14364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-assessing-data-replication-in-music-generation-with-music-similarity-metrics-on-raw-audio-2407.14364"/></url>
<url><loc>https://scifaro.com/en/abs/morse-code-enabled-speech-recognition-for-individuals-with-visual-and-hearing-impairments-2407.14525</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/morse-code-enabled-speech-recognition-for-individuals-with-visual-and-hearing-impairments-2407.14525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/morse-code-enabled-speech-recognition-for-individuals-with-visual-and-hearing-impairments-2407.14525"/></url>
<url><loc>https://scifaro.com/en/abs/composer-s-assistant-2-interactive-multi-track-midi-infilling-with-fine-grained-user-control-2407.14700</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composer-s-assistant-2-interactive-multi-track-midi-infilling-with-fine-grained-user-control-2407.14700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composer-s-assistant-2-interactive-multi-track-midi-infilling-with-fine-grained-user-control-2407.14700"/></url>
<url><loc>https://scifaro.com/en/abs/musicongen-rhythm-and-chord-control-for-transformer-based-text-to-music-generation-2407.15060</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicongen-rhythm-and-chord-control-for-transformer-based-text-to-music-generation-2407.15060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicongen-rhythm-and-chord-control-for-transformer-based-text-to-music-generation-2407.15060"/></url>
<url><loc>https://scifaro.com/en/abs/explainability-paths-for-sustained-artistic-practice-with-ai-2407.15216</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainability-paths-for-sustained-artistic-practice-with-ai-2407.15216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainability-paths-for-sustained-artistic-practice-with-ai-2407.15216"/></url>
<url><loc>https://scifaro.com/en/abs/selm-enhancing-speech-emotion-recognition-for-out-of-domain-scenarios-2407.15300</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selm-enhancing-speech-emotion-recognition-for-out-of-domain-scenarios-2407.15300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selm-enhancing-speech-emotion-recognition-for-out-of-domain-scenarios-2407.15300"/></url>
<url><loc>https://scifaro.com/en/abs/computer-audition-from-task-specific-machine-learning-to-foundation-models-2407.15672</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computer-audition-from-task-specific-machine-learning-to-foundation-models-2407.15672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computer-audition-from-task-specific-machine-learning-to-foundation-models-2407.15672"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-utility-of-speech-and-audio-foundation-models-for-marmoset-call-analysis-2407.16417</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-utility-of-speech-and-audio-foundation-models-for-marmoset-call-analysis-2407.16417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-utility-of-speech-and-audio-foundation-models-for-marmoset-call-analysis-2407.16417"/></url>
<url><loc>https://scifaro.com/en/abs/audio-prompt-adapter-unleashing-music-editing-abilities-for-text-to-music-with-lightweight-finetuning-2407.16564</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-prompt-adapter-unleashing-music-editing-abilities-for-text-to-music-with-lightweight-finetuning-2407.16564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-prompt-adapter-unleashing-music-editing-abilities-for-text-to-music-with-lightweight-finetuning-2407.16564"/></url>
<url><loc>https://scifaro.com/en/abs/distortion-recovery-a-two-stage-method-for-guitar-effect-removal-2407.16639</loc><lastmod>2024-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distortion-recovery-a-two-stage-method-for-guitar-effect-removal-2407.16639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distortion-recovery-a-two-stage-method-for-guitar-effect-removal-2407.16639"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-vs-few-shot-multi-speaker-tts-using-pre-trained-czech-speecht5-model-2407.17167</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-vs-few-shot-multi-speaker-tts-using-pre-trained-czech-speecht5-model-2407.17167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-vs-few-shot-multi-speaker-tts-using-pre-trained-czech-speecht5-model-2407.17167"/></url>
<url><loc>https://scifaro.com/en/abs/speech-editing-a-summary-2407.17172</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-editing-a-summary-2407.17172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-editing-a-summary-2407.17172"/></url>
<url><loc>https://scifaro.com/en/abs/improved-symbolic-drum-style-classification-with-grammar-based-hierarchical-representations-2407.17536</loc><lastmod>2024-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-symbolic-drum-style-classification-with-grammar-based-hierarchical-representations-2407.17536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-symbolic-drum-style-classification-with-grammar-based-hierarchical-representations-2407.17536"/></url>
<url><loc>https://scifaro.com/en/abs/describe-where-you-are-improving-noise-robustness-for-speech-emotion-recognition-with-text-description-of-the-environment-2407.17716</loc><lastmod>2025-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/describe-where-you-are-improving-noise-robustness-for-speech-emotion-recognition-with-text-description-of-the-environment-2407.17716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/describe-where-you-are-improving-noise-robustness-for-speech-emotion-recognition-with-text-description-of-the-environment-2407.17716"/></url>
<url><loc>https://scifaro.com/en/abs/innovative-speech-based-deep-learning-approaches-for-parkinson-s-disease-classification-a-systematic-review-2407.17844</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/innovative-speech-based-deep-learning-approaches-for-parkinson-s-disease-classification-a-systematic-review-2407.17844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/innovative-speech-based-deep-learning-approaches-for-parkinson-s-disease-classification-a-systematic-review-2407.17844"/></url>
<url><loc>https://scifaro.com/en/abs/i-can-listen-but-cannot-read-an-evaluation-of-two-tower-multimodal-systems-for-instrument-recognition-2407.18058</loc><lastmod>2024-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-can-listen-but-cannot-read-an-evaluation-of-two-tower-multimodal-systems-for-instrument-recognition-2407.18058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-can-listen-but-cannot-read-an-evaluation-of-two-tower-multimodal-systems-for-instrument-recognition-2407.18058"/></url>
<url><loc>https://scifaro.com/en/abs/audio-entailment-assessing-deductive-reasoning-for-audio-understanding-2407.18062</loc><lastmod>2024-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-entailment-assessing-deductive-reasoning-for-audio-understanding-2407.18062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-entailment-assessing-deductive-reasoning-for-audio-understanding-2407.18062"/></url>
<url><loc>https://scifaro.com/en/abs/model-driven-heart-rate-estimation-and-heart-murmur-detection-based-on-phonocardiogram-2407.18424</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-driven-heart-rate-estimation-and-heart-murmur-detection-based-on-phonocardiogram-2407.18424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-driven-heart-rate-estimation-and-heart-murmur-detection-based-on-phonocardiogram-2407.18424"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-dysarthric-speech-recognition-for-unseen-speakers-via-prototype-based-adaptation-2407.18461</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-dysarthric-speech-recognition-for-unseen-speakers-via-prototype-based-adaptation-2407.18461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-dysarthric-speech-recognition-for-unseen-speakers-via-prototype-based-adaptation-2407.18461"/></url>
<url><loc>https://scifaro.com/en/abs/slim-style-linguistics-mismatch-model-for-generalized-audio-deepfake-detection-2407.18517</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slim-style-linguistics-mismatch-model-for-generalized-audio-deepfake-detection-2407.18517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slim-style-linguistics-mismatch-model-for-generalized-audio-deepfake-detection-2407.18517"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improving-nam-to-speech-synthesis-intelligibility-using-self-supervised-speech-models-2407.18541</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improving-nam-to-speech-synthesis-intelligibility-using-self-supervised-speech-models-2407.18541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improving-nam-to-speech-synthesis-intelligibility-using-self-supervised-speech-models-2407.18541"/></url>
<url><loc>https://scifaro.com/en/abs/speech-bandwidth-expansion-via-high-fidelity-generative-adversarial-networks-2407.18571</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-bandwidth-expansion-via-high-fidelity-generative-adversarial-networks-2407.18571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-bandwidth-expansion-via-high-fidelity-generative-adversarial-networks-2407.18571"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-tts-synthesized-data-for-efficient-development-of-keyword-spotting-model-2407.18879</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-tts-synthesized-data-for-efficient-development-of-keyword-spotting-model-2407.18879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-tts-synthesized-data-for-efficient-development-of-keyword-spotting-model-2407.18879"/></url>
<url><loc>https://scifaro.com/en/abs/implementation-and-applications-of-wakewords-integrated-with-speaker-recognition-a-case-study-2407.18985</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementation-and-applications-of-wakewords-integrated-with-speaker-recognition-a-case-study-2407.18985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementation-and-applications-of-wakewords-integrated-with-speaker-recognition-a-case-study-2407.18985"/></url>
<url><loc>https://scifaro.com/en/abs/ravss-robust-audio-visual-speech-separation-in-multi-speaker-scenarios-with-missing-visual-cues-2407.19224</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ravss-robust-audio-visual-speech-separation-in-multi-speaker-scenarios-with-missing-visual-cues-2407.19224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ravss-robust-audio-visual-speech-separation-in-multi-speaker-scenarios-with-missing-visual-cues-2407.19224"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-few-shot-class-incremental-learning-in-audio-classification-using-contrastive-representation-2407.19265</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-few-shot-class-incremental-learning-in-audio-classification-using-contrastive-representation-2407.19265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-few-shot-class-incremental-learning-in-audio-classification-using-contrastive-representation-2407.19265"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-and-reducing-the-synthetic-to-real-transfer-gap-in-music-information-retrieval-the-task-of-automatic-drum-transcription-2407.19823</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-and-reducing-the-synthetic-to-real-transfer-gap-in-music-information-retrieval-the-task-of-automatic-drum-transcription-2407.19823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-and-reducing-the-synthetic-to-real-transfer-gap-in-music-information-retrieval-the-task-of-automatic-drum-transcription-2407.19823"/></url>
<url><loc>https://scifaro.com/en/abs/wavespace-a-highly-explorable-wavetable-generator-2407.19862</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavespace-a-highly-explorable-wavetable-generator-2407.19862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavespace-a-highly-explorable-wavetable-generator-2407.19862"/></url>
<url><loc>https://scifaro.com/en/abs/practical-and-reproducible-symbolic-music-generation-by-large-language-models-with-structural-embeddings-2407.19900</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/practical-and-reproducible-symbolic-music-generation-by-large-language-models-with-structural-embeddings-2407.19900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/practical-and-reproducible-symbolic-music-generation-by-large-language-models-with-structural-embeddings-2407.19900"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-anti-spoofing-countermeasures-robustness-through-joint-optimization-and-transfer-learning-2407.20111</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-anti-spoofing-countermeasures-robustness-through-joint-optimization-and-transfer-learning-2407.20111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-anti-spoofing-countermeasures-robustness-through-joint-optimization-and-transfer-learning-2407.20111"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-driven-melody-harmonization-via-melodic-variation-and-functional-representation-2407.20176</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-driven-melody-harmonization-via-melodic-variation-and-functional-representation-2407.20176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-driven-melody-harmonization-via-melodic-variation-and-functional-representation-2407.20176"/></url>
<url><loc>https://scifaro.com/en/abs/futga-towards-fine-grained-music-understanding-through-temporally-enhanced-generative-augmentation-2407.20445</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/futga-towards-fine-grained-music-understanding-through-temporally-enhanced-generative-augmentation-2407.20445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/futga-towards-fine-grained-music-understanding-through-temporally-enhanced-generative-augmentation-2407.20445"/></url>
<url><loc>https://scifaro.com/en/abs/supercodec-a-neural-speech-codec-with-selective-back-projection-network-2407.20530</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supercodec-a-neural-speech-codec-with-selective-back-projection-network-2407.20530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supercodec-a-neural-speech-codec-with-selective-back-projection-network-2407.20530"/></url>
<url><loc>https://scifaro.com/en/abs/abusive-speech-detection-in-indic-languages-using-acoustic-features-2407.20808</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/abusive-speech-detection-in-indic-languages-using-acoustic-features-2407.20808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/abusive-speech-detection-in-indic-languages-using-acoustic-features-2407.20808"/></url>
<url><loc>https://scifaro.com/en/abs/picogen-generate-piano-covers-with-a-two-stage-approach-2407.20883</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/picogen-generate-piano-covers-with-a-two-stage-approach-2407.20883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/picogen-generate-piano-covers-with-a-two-stage-approach-2407.20883"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-driven-piano-music-generation-via-two-stage-disentanglement-and-functional-representation-2407.20955</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-driven-piano-music-generation-via-two-stage-disentanglement-and-functional-representation-2407.20955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-driven-piano-music-generation-via-two-stage-disentanglement-and-functional-representation-2407.20955"/></url>
<url><loc>https://scifaro.com/en/abs/contrasting-deep-learning-models-for-direct-respiratory-insufficiency-detection-versus-blood-oxygen-saturation-estimation-2407.20989</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrasting-deep-learning-models-for-direct-respiratory-insufficiency-detection-versus-blood-oxygen-saturation-estimation-2407.20989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrasting-deep-learning-models-for-direct-respiratory-insufficiency-detection-versus-blood-oxygen-saturation-estimation-2407.20989"/></url>
<url><loc>https://scifaro.com/en/abs/design-and-development-of-laughter-recognition-system-based-on-multimodal-fusion-and-deep-learning-2407.21391</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-and-development-of-laughter-recognition-system-based-on-multimodal-fusion-and-deep-learning-2407.21391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-and-development-of-laughter-recognition-system-based-on-multimodal-fusion-and-deep-learning-2407.21391"/></url>
<url><loc>https://scifaro.com/en/abs/can-llms-reason-in-music-an-evaluation-of-llms-capability-of-music-understanding-and-generation-2407.21531</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-llms-reason-in-music-an-evaluation-of-llms-capability-of-music-understanding-and-generation-2407.21531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-llms-reason-in-music-an-evaluation-of-llms-capability-of-music-understanding-and-generation-2407.21531"/></url>
<url><loc>https://scifaro.com/en/abs/robust-lossy-audio-compression-identification-2407.21545</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-lossy-audio-compression-identification-2407.21545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-lossy-audio-compression-identification-2407.21545"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-partially-spoofed-audio-localization-with-boundary-aware-attention-mechanism-2407.21611</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-partially-spoofed-audio-localization-with-boundary-aware-attention-mechanism-2407.21611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-partially-spoofed-audio-localization-with-boundary-aware-attention-mechanism-2407.21611"/></url>
<url><loc>https://scifaro.com/en/abs/between-the-ai-and-me-analysing-listeners-perspectives-on-ai-and-human-composed-progressive-metal-music-2407.21615</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/between-the-ai-and-me-analysing-listeners-perspectives-on-ai-and-human-composed-progressive-metal-music-2407.21615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/between-the-ai-and-me-analysing-listeners-perspectives-on-ai-and-human-composed-progressive-metal-music-2407.21615"/></url>
<url><loc>https://scifaro.com/en/abs/beat-this-accurate-beat-tracking-without-dbn-postprocessing-2407.21658</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beat-this-accurate-beat-tracking-without-dbn-postprocessing-2407.21658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beat-this-accurate-beat-tracking-without-dbn-postprocessing-2407.21658"/></url>
<url><loc>https://scifaro.com/en/abs/combining-audio-control-and-style-transfer-using-latent-diffusion-2408.00196</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-audio-control-and-style-transfer-using-latent-diffusion-2408.00196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-audio-control-and-style-transfer-using-latent-diffusion-2408.00196"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-prototype-refinement-for-ambiguous-speech-emotion-recognition-2408.00325</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-prototype-refinement-for-ambiguous-speech-emotion-recognition-2408.00325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-prototype-refinement-for-ambiguous-speech-emotion-recognition-2408.00325"/></url>
<url><loc>https://scifaro.com/en/abs/interaural-time-difference-loss-for-binaural-target-sound-extraction-2408.00344</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interaural-time-difference-loss-for-binaural-target-sound-extraction-2408.00344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interaural-time-difference-loss-for-binaural-target-sound-extraction-2408.00344"/></url>
<url><loc>https://scifaro.com/en/abs/towards-explainable-and-interpretable-musical-difficulty-estimation-a-parameter-efficient-approach-2408.00473</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-explainable-and-interpretable-musical-difficulty-estimation-a-parameter-efficient-approach-2408.00473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-explainable-and-interpretable-musical-difficulty-estimation-a-parameter-efficient-approach-2408.00473"/></url>
<url><loc>https://scifaro.com/en/abs/chordsync-conformer-based-alignment-of-chord-annotations-to-music-audio-2408.00674</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chordsync-conformer-based-alignment-of-chord-annotations-to-music-audio-2408.00674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chordsync-conformer-based-alignment-of-chord-annotations-to-music-audio-2408.00674"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-midi-format-piano-performance-generation-2408.00900</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-midi-format-piano-performance-generation-2408.00900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-midi-format-piano-performance-generation-2408.00900"/></url>
<url><loc>https://scifaro.com/en/abs/six-dragons-fly-again-reviving-15th-century-korean-court-music-with-transformers-and-novel-encoding-2408.01096</loc><lastmod>2024-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/six-dragons-fly-again-reviving-15th-century-korean-court-music-with-transformers-and-novel-encoding-2408.01096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/six-dragons-fly-again-reviving-15th-century-korean-court-music-with-transformers-and-novel-encoding-2408.01096"/></url>
<url><loc>https://scifaro.com/en/abs/nested-music-transformer-sequentially-decoding-compound-tokens-in-symbolic-music-and-audio-generation-2408.01180</loc><lastmod>2026-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nested-music-transformer-sequentially-decoding-compound-tokens-in-symbolic-music-and-audio-generation-2408.01180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nested-music-transformer-sequentially-decoding-compound-tokens-in-symbolic-music-and-audio-generation-2408.01180"/></url>
<url><loc>https://scifaro.com/en/abs/muchomusic-evaluating-music-understanding-in-multimodal-audio-language-models-2408.01337</loc><lastmod>2024-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muchomusic-evaluating-music-understanding-in-multimodal-audio-language-models-2408.01337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muchomusic-evaluating-music-understanding-in-multimodal-audio-language-models-2408.01337"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-cross-modal-attention-for-audio-visual-deepfake-detection-and-localization-2408.01532</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-cross-modal-attention-for-audio-visual-deepfake-detection-and-localization-2408.01532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-cross-modal-attention-for-audio-visual-deepfake-detection-and-localization-2408.01532"/></url>
<url><loc>https://scifaro.com/en/abs/picogen2-piano-cover-generation-with-transfer-learning-approach-and-weakly-aligned-data-2408.01551</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/picogen2-piano-cover-generation-with-transfer-learning-approach-and-weakly-aligned-data-2408.01551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/picogen2-piano-cover-generation-with-transfer-learning-approach-and-weakly-aligned-data-2408.01551"/></url>
<url><loc>https://scifaro.com/en/abs/generating-high-quality-symbolic-music-using-fine-grained-discriminators-2408.01696</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-high-quality-symbolic-music-using-fine-grained-discriminators-2408.01696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-high-quality-symbolic-music-using-fine-grained-discriminators-2408.01696"/></url>
<url><loc>https://scifaro.com/en/abs/why-perturbing-symbolic-music-is-necessary-fitting-the-distribution-of-never-used-notes-through-a-joint-probabilistic-diffusion-model-2408.01950</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-perturbing-symbolic-music-is-necessary-fitting-the-distribution-of-never-used-notes-through-a-joint-probabilistic-diffusion-model-2408.01950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-perturbing-symbolic-music-is-necessary-fitting-the-distribution-of-never-used-notes-through-a-joint-probabilistic-diffusion-model-2408.01950"/></url>
<url><loc>https://scifaro.com/en/abs/joint-learning-of-emotions-in-music-and-generalized-sounds-2408.02009</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-learning-of-emotions-in-music-and-generalized-sounds-2408.02009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-learning-of-emotions-in-music-and-generalized-sounds-2408.02009"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-based-chaining-cluster-for-multilingual-voice-face-association-2408.02025</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-based-chaining-cluster-for-multilingual-voice-face-association-2408.02025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-based-chaining-cluster-for-multilingual-voice-face-association-2408.02025"/></url>
<url><loc>https://scifaro.com/en/abs/dise-no-de-sonido-para-producciones-audiovisuales-e-historias-sonoras-en-el-aula-hacia-una-docencia-creativa-mediante-el-uso-de-herramientas-inteligentes-2408.02113</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dise-no-de-sonido-para-producciones-audiovisuales-e-historias-sonoras-en-el-aula-hacia-una-docencia-creativa-mediante-el-uso-de-herramientas-inteligentes-2408.02113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dise-no-de-sonido-para-producciones-audiovisuales-e-historias-sonoras-en-el-aula-hacia-una-docencia-creativa-mediante-el-uso-de-herramientas-inteligentes-2408.02113"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-to-optimize-inference-of-the-diart-speaker-diarization-pipeline-2408.02341</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-to-optimize-inference-of-the-diart-speaker-diarization-pipeline-2408.02341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-to-optimize-inference-of-the-diart-speaker-diarization-pipeline-2408.02341"/></url>
<url><loc>https://scifaro.com/en/abs/steer-by-prior-editing-of-symbolic-music-loops-2408.02434</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steer-by-prior-editing-of-symbolic-music-loops-2408.02434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steer-by-prior-editing-of-symbolic-music-loops-2408.02434"/></url>
<url><loc>https://scifaro.com/en/abs/stem-jepa-a-joint-embedding-predictive-architecture-for-musical-stem-compatibility-estimation-2408.02514</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stem-jepa-a-joint-embedding-predictive-architecture-for-musical-stem-compatibility-estimation-2408.02514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stem-jepa-a-joint-embedding-predictive-architecture-for-musical-stem-compatibility-estimation-2408.02514"/></url>
<url><loc>https://scifaro.com/en/abs/clustering-and-mining-accented-speech-for-inclusive-and-fair-speech-recognition-2408.02582</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clustering-and-mining-accented-speech-for-inclusive-and-fair-speech-recognition-2408.02582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clustering-and-mining-accented-speech-for-inclusive-and-fair-speech-recognition-2408.02582"/></url>
<url><loc>https://scifaro.com/en/abs/text-conditioned-symbolic-drumbeat-generation-using-latent-diffusion-models-2408.02711</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-conditioned-symbolic-drumbeat-generation-using-latent-diffusion-models-2408.02711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-conditioned-symbolic-drumbeat-generation-using-latent-diffusion-models-2408.02711"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-voice-identification-after-speech-resynthesis-using-ppg-2408.02712</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-voice-identification-after-speech-resynthesis-using-ppg-2408.02712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-voice-identification-after-speech-resynthesis-using-ppg-2408.02712"/></url>
<url><loc>https://scifaro.com/en/abs/grafx-an-open-source-library-for-audio-processing-graphs-in-pytorch-2408.03204</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/grafx-an-open-source-library-for-audio-processing-graphs-in-pytorch-2408.03204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/grafx-an-open-source-library-for-audio-processing-graphs-in-pytorch-2408.03204"/></url>
<url><loc>https://scifaro.com/en/abs/central-kurdish-text-to-speech-synthesis-with-novel-end-to-end-transformer-training-2408.03887</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/central-kurdish-text-to-speech-synthesis-with-novel-end-to-end-transformer-training-2408.03887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/central-kurdish-text-to-speech-synthesis-with-novel-end-to-end-transformer-training-2408.03887"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptation-for-quantised-end-to-end-asr-models-2408.03979</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptation-for-quantised-end-to-end-asr-models-2408.03979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptation-for-quantised-end-to-end-asr-models-2408.03979"/></url>
<url><loc>https://scifaro.com/en/abs/distil-dccrn-a-small-footprint-dccrn-leveraging-feature-based-knowledge-distillation-in-speech-enhancement-2408.04267</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distil-dccrn-a-small-footprint-dccrn-leveraging-feature-based-knowledge-distillation-in-speech-enhancement-2408.04267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distil-dccrn-a-small-footprint-dccrn-leveraging-feature-based-knowledge-distillation-in-speech-enhancement-2408.04267"/></url>
<url><loc>https://scifaro.com/en/abs/thegluenote-learned-representations-for-robust-and-flexible-note-alignment-2408.04309</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thegluenote-learned-representations-for-robust-and-flexible-note-alignment-2408.04309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thegluenote-learned-representations-for-robust-and-flexible-note-alignment-2408.04309"/></url>
<url><loc>https://scifaro.com/en/abs/neuralmultiling-a-novel-neural-architecture-search-for-smartphone-based-multilingual-speaker-verification-2408.04362</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuralmultiling-a-novel-neural-architecture-search-for-smartphone-based-multilingual-speaker-verification-2408.04362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuralmultiling-a-novel-neural-architecture-search-for-smartphone-based-multilingual-speaker-verification-2408.04362"/></url>
<url><loc>https://scifaro.com/en/abs/mullivc-multi-lingual-voice-conversion-with-cycle-consistency-2408.04708</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mullivc-multi-lingual-voice-conversion-with-cycle-consistency-2408.04708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mullivc-multi-lingual-voice-conversion-with-cycle-consistency-2408.04708"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-the-corpus-bias-problem-in-automatic-music-transcription-systems-2408.04737</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-the-corpus-bias-problem-in-automatic-music-transcription-systems-2408.04737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-the-corpus-bias-problem-in-automatic-music-transcription-systems-2408.04737"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-consistency-preserving-loss-and-perceptual-contrast-stretching-to-boost-ssl-based-speech-enhancement-2408.04773</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-consistency-preserving-loss-and-perceptual-contrast-stretching-to-boost-ssl-based-speech-enhancement-2408.04773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-consistency-preserving-loss-and-perceptual-contrast-stretching-to-boost-ssl-based-speech-enhancement-2408.04773"/></url>
<url><loc>https://scifaro.com/en/abs/hyper-recurrent-neural-network-condition-mechanisms-for-black-box-audio-effect-modeling-2408.04829</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyper-recurrent-neural-network-condition-mechanisms-for-black-box-audio-effect-modeling-2408.04829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyper-recurrent-neural-network-condition-mechanisms-for-black-box-audio-effect-modeling-2408.04829"/></url>
<url><loc>https://scifaro.com/en/abs/teadapter-supply-abundant-guidance-for-controllable-text-to-music-generation-2408.04865</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teadapter-supply-abundant-guidance-for-controllable-text-to-music-generation-2408.04865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teadapter-supply-abundant-guidance-for-controllable-text-to-music-generation-2408.04865"/></url>
<url><loc>https://scifaro.com/en/abs/acousaf-acoustic-sensing-based-atrial-fibrillation-detection-system-for-mobile-phones-2408.04912</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acousaf-acoustic-sensing-based-atrial-fibrillation-detection-system-for-mobile-phones-2408.04912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acousaf-acoustic-sensing-based-atrial-fibrillation-detection-system-for-mobile-phones-2408.04912"/></url>
<url><loc>https://scifaro.com/en/abs/midi-to-tab-guitar-tablature-inference-via-masked-language-modeling-2408.05024</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-to-tab-guitar-tablature-inference-via-masked-language-modeling-2408.05024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-to-tab-guitar-tablature-inference-via-masked-language-modeling-2408.05024"/></url>
<url><loc>https://scifaro.com/en/abs/seld-mamba-selective-state-space-model-for-sound-event-localization-and-detection-with-source-distance-estimation-2408.05057</loc><lastmod>2024-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seld-mamba-selective-state-space-model-for-sound-event-localization-and-detection-with-source-distance-estimation-2408.05057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seld-mamba-selective-state-space-model-for-sound-event-localization-and-detection-with-source-distance-estimation-2408.05057"/></url>
<url><loc>https://scifaro.com/en/abs/stream-based-active-learning-for-anomalous-sound-detection-in-machine-condition-monitoring-2408.05493</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stream-based-active-learning-for-anomalous-sound-detection-in-machine-condition-monitoring-2408.05493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stream-based-active-learning-for-anomalous-sound-detection-in-machine-condition-monitoring-2408.05493"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-general-disentanglement-based-speaker-anonymization-for-enhanced-emotion-preservation-2408.05928</loc><lastmod>2025-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-general-disentanglement-based-speaker-anonymization-for-enhanced-emotion-preservation-2408.05928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-general-disentanglement-based-speaker-anonymization-for-enhanced-emotion-preservation-2408.05928"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-surprisal-in-music-generation-via-information-content-curve-matching-2408.06022</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-surprisal-in-music-generation-via-information-content-curve-matching-2408.06022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-surprisal-in-music-generation-via-information-content-curve-matching-2408.06022"/></url>
<url><loc>https://scifaro.com/en/abs/pyneuralfx-a-python-package-for-neural-audio-effect-modeling-2408.06053</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pyneuralfx-a-python-package-for-neural-audio-effect-modeling-2408.06053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pyneuralfx-a-python-package-for-neural-audio-effect-modeling-2408.06053"/></url>
<url><loc>https://scifaro.com/en/abs/audio-enhancement-for-computer-audition-an-iterative-training-paradigm-using-sample-importance-2408.06264</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-enhancement-for-computer-audition-an-iterative-training-paradigm-using-sample-importance-2408.06264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-enhancement-for-computer-audition-an-iterative-training-paradigm-using-sample-importance-2408.06264"/></url>
<url><loc>https://scifaro.com/en/abs/fovnet-configurable-field-of-view-speech-enhancement-with-low-computation-and-distortion-for-smart-glasses-2408.06468</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fovnet-configurable-field-of-view-speech-enhancement-with-low-computation-and-distortion-for-smart-glasses-2408.06468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fovnet-configurable-field-of-view-speech-enhancement-with-low-computation-and-distortion-for-smart-glasses-2408.06468"/></url>
<url><loc>https://scifaro.com/en/abs/music2latent-consistency-autoencoders-for-latent-audio-compression-2408.06500</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music2latent-consistency-autoencoders-for-latent-audio-compression-2408.06500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music2latent-consistency-autoencoders-for-latent-audio-compression-2408.06500"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-speaker-identification-architectural-insights-from-ab-1-corpus-analysis-and-performance-evaluation-2408.06804</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-speaker-identification-architectural-insights-from-ab-1-corpus-analysis-and-performance-evaluation-2408.06804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-speaker-identification-architectural-insights-from-ab-1-corpus-analysis-and-performance-evaluation-2408.06804"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-variability-and-multi-viewed-self-supervised-representations-to-tackle-the-asvspoof5-deepfake-challenge-2408.06922</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-variability-and-multi-viewed-self-supervised-representations-to-tackle-the-asvspoof5-deepfake-challenge-2408.06922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-variability-and-multi-viewed-self-supervised-representations-to-tackle-the-asvspoof5-deepfake-challenge-2408.06922"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-and-audio-coding-modern-ai-technology-meets-traditional-codecs-2408.06954</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-and-audio-coding-modern-ai-technology-meets-traditional-codecs-2408.06954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-and-audio-coding-modern-ai-technology-meets-traditional-codecs-2408.06954"/></url>
<url><loc>https://scifaro.com/en/abs/content-and-style-aware-audio-driven-facial-animation-2408.07005</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-and-style-aware-audio-driven-facial-animation-2408.07005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-and-style-aware-audio-driven-facial-animation-2408.07005"/></url>
<url><loc>https://scifaro.com/en/abs/source-separation-of-multi-source-raw-music-using-a-residual-quantized-variational-autoencoder-2408.07020</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-separation-of-multi-source-raw-music-using-a-residual-quantized-variational-autoencoder-2408.07020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-separation-of-multi-source-raw-music-using-a-residual-quantized-variational-autoencoder-2408.07020"/></url>
<url><loc>https://scifaro.com/en/abs/psm-learning-probabilistic-embeddings-for-multi-scale-zero-shot-soundscape-mapping-2408.07050</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psm-learning-probabilistic-embeddings-for-multi-scale-zero-shot-soundscape-mapping-2408.07050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psm-learning-probabilistic-embeddings-for-multi-scale-zero-shot-soundscape-mapping-2408.07050"/></url>
<url><loc>https://scifaro.com/en/abs/a-theory-based-explainable-deep-learning-architecture-for-music-emotion-2408.07113</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-theory-based-explainable-deep-learning-architecture-for-music-emotion-2408.07113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-theory-based-explainable-deep-learning-architecture-for-music-emotion-2408.07113"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-dataset-notation-software-and-representation-for-computational-schenkerian-analysis-2408.07184</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-dataset-notation-software-and-representation-for-computational-schenkerian-analysis-2408.07184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-dataset-notation-software-and-representation-for-computational-schenkerian-analysis-2408.07184"/></url>
<url><loc>https://scifaro.com/en/abs/play-me-something-icy-practical-challenges-explainability-and-the-semantic-gap-in-generative-ai-music-2408.07224</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/play-me-something-icy-practical-challenges-explainability-and-the-semantic-gap-in-generative-ai-music-2408.07224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/play-me-something-icy-practical-challenges-explainability-and-the-semantic-gap-in-generative-ai-music-2408.07224"/></url>
<url><loc>https://scifaro.com/en/abs/dpsnn-spiking-neural-network-for-low-latency-streaming-speech-enhancement-2408.07388</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpsnn-spiking-neural-network-for-low-latency-streaming-speech-enhancement-2408.07388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpsnn-spiking-neural-network-for-low-latency-streaming-speech-enhancement-2408.07388"/></url>
<url><loc>https://scifaro.com/en/abs/optimising-mfcc-parameters-for-the-automatic-detection-of-respiratory-diseases-2408.07522</loc><lastmod>2026-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimising-mfcc-parameters-for-the-automatic-detection-of-respiratory-diseases-2408.07522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimising-mfcc-parameters-for-the-automatic-detection-of-respiratory-diseases-2408.07522"/></url>
<url><loc>https://scifaro.com/en/abs/periodwave-multi-period-flow-matching-for-high-fidelity-waveform-generation-2408.07547</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/periodwave-multi-period-flow-matching-for-high-fidelity-waveform-generation-2408.07547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/periodwave-multi-period-flow-matching-for-high-fidelity-waveform-generation-2408.07547"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-high-fidelity-waveform-generation-via-adversarial-flow-matching-optimization-2408.08019</loc><lastmod>2024-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-high-fidelity-waveform-generation-via-adversarial-flow-matching-optimization-2408.08019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-high-fidelity-waveform-generation-via-adversarial-flow-matching-optimization-2408.08019"/></url>
<url><loc>https://scifaro.com/en/abs/the-evolution-of-inharmonicity-and-noisiness-in-contemporary-popular-music-2408.08127</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-evolution-of-inharmonicity-and-noisiness-in-contemporary-popular-music-2408.08127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-evolution-of-inharmonicity-and-noisiness-in-contemporary-popular-music-2408.08127"/></url>
<url><loc>https://scifaro.com/en/abs/gaps-a-large-and-diverse-classical-guitar-dataset-and-benchmark-transcription-model-2408.08653</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaps-a-large-and-diverse-classical-guitar-dataset-and-benchmark-transcription-model-2408.08653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaps-a-large-and-diverse-classical-guitar-dataset-and-benchmark-transcription-model-2408.08653"/></url>
<url><loc>https://scifaro.com/en/abs/hsdreport-heart-sound-diagnosis-with-echocardiography-reports-2408.08669</loc><lastmod>2024-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hsdreport-heart-sound-diagnosis-with-echocardiography-reports-2408.08669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hsdreport-heart-sound-diagnosis-with-echocardiography-reports-2408.08669"/></url>
<url><loc>https://scifaro.com/en/abs/mat-sed-a-masked-audio-transformer-with-masked-reconstruction-based-pre-training-for-sound-event-detection-2408.08673</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mat-sed-a-masked-audio-transformer-with-masked-reconstruction-based-pre-training-for-sound-event-detection-2408.08673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mat-sed-a-masked-audio-transformer-with-masked-reconstruction-based-pre-training-for-sound-event-detection-2408.08673"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-autoregressive-audio-modeling-via-next-scale-prediction-2408.09027</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-autoregressive-audio-modeling-via-next-scale-prediction-2408.09027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-autoregressive-audio-modeling-via-next-scale-prediction-2408.09027"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audio-language-models-through-self-supervised-post-training-with-text-audio-pairs-2408.09269</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audio-language-models-through-self-supervised-post-training-with-text-audio-pairs-2408.09269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audio-language-models-through-self-supervised-post-training-with-text-audio-pairs-2408.09269"/></url>
<url><loc>https://scifaro.com/en/abs/a-transcription-prompt-based-efficient-audio-large-language-model-for-robust-speech-recognition-2408.09491</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transcription-prompt-based-efficient-audio-large-language-model-for-robust-speech-recognition-2408.09491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transcription-prompt-based-efficient-audio-large-language-model-for-robust-speech-recognition-2408.09491"/></url>
<url><loc>https://scifaro.com/en/abs/hear-your-face-face-based-voice-conversion-with-f0-estimation-2408.09802</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hear-your-face-face-based-voice-conversion-with-f0-estimation-2408.09802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hear-your-face-face-based-voice-conversion-with-f0-estimation-2408.09802"/></url>
<url><loc>https://scifaro.com/en/abs/szu-afs-antispoofing-system-for-the-asvspoof-5-challenge-2408.09933</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/szu-afs-antispoofing-system-for-the-asvspoof-5-challenge-2408.09933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/szu-afs-antispoofing-system-for-the-asvspoof-5-challenge-2408.09933"/></url>
<url><loc>https://scifaro.com/en/abs/convert-and-speak-zero-shot-accent-conversion-with-minimum-supervision-2408.10096</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convert-and-speak-zero-shot-accent-conversion-with-minimum-supervision-2408.10096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convert-and-speak-zero-shot-accent-conversion-with-minimum-supervision-2408.10096"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-voice-cloning-for-nepali-leveraging-transfer-learning-in-a-low-resource-language-2408.10128</loc><lastmod>2024-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-voice-cloning-for-nepali-leveraging-transfer-learning-in-a-low-resource-language-2408.10128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-voice-cloning-for-nepali-leveraging-transfer-learning-in-a-low-resource-language-2408.10128"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-in-audio-and-speech-processing-an-end-to-end-comprehensive-review-2408.10330</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-in-audio-and-speech-processing-an-end-to-end-comprehensive-review-2408.10330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-in-audio-and-speech-processing-an-end-to-end-comprehensive-review-2408.10330"/></url>
<url><loc>https://scifaro.com/en/abs/brewclip-a-bifurcated-representation-learning-framework-for-audio-visual-retrieval-2408.10383</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/brewclip-a-bifurcated-representation-learning-framework-for-audio-visual-retrieval-2408.10383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/brewclip-a-bifurcated-representation-learning-framework-for-audio-visual-retrieval-2408.10383"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-training-of-keyword-spotting-to-minimize-tts-data-overfitting-2408.10463</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-training-of-keyword-spotting-to-minimize-tts-data-overfitting-2408.10463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-training-of-keyword-spotting-to-minimize-tts-data-overfitting-2408.10463"/></url>
<url><loc>https://scifaro.com/en/abs/icsd-an-open-source-dataset-for-infant-cry-and-snoring-detection-2408.10561</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icsd-an-open-source-dataset-for-infant-cry-and-snoring-detection-2408.10561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icsd-an-open-source-dataset-for-infant-cry-and-snoring-detection-2408.10561"/></url>
<url><loc>https://scifaro.com/en/abs/dismix-disentangling-mixtures-of-musical-instruments-for-source-level-pitch-and-timbre-manipulation-2408.10807</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dismix-disentangling-mixtures-of-musical-instruments-for-source-level-pitch-and-timbre-manipulation-2408.10807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dismix-disentangling-mixtures-of-musical-instruments-for-source-level-pitch-and-timbre-manipulation-2408.10807"/></url>
<url><loc>https://scifaro.com/en/abs/a-noval-feature-via-color-quantisation-for-fake-audio-detection-2408.10849</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-noval-feature-via-color-quantisation-for-fake-audio-detection-2408.10849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-noval-feature-via-color-quantisation-for-fake-audio-detection-2408.10849"/></url>
<url><loc>https://scifaro.com/en/abs/eele-exploring-efficient-and-extensible-lora-integration-in-emotional-text-to-speech-2408.10852</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eele-exploring-efficient-and-extensible-lora-integration-in-emotional-text-to-speech-2408.10852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eele-exploring-efficient-and-extensible-lora-integration-in-emotional-text-to-speech-2408.10852"/></url>
<url><loc>https://scifaro.com/en/abs/does-current-deepfake-audio-detection-model-effectively-detect-alm-based-deepfake-audio-2408.10853</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-current-deepfake-audio-detection-model-effectively-detect-alm-based-deepfake-audio-2408.10853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-current-deepfake-audio-detection-model-effectively-detect-alm-based-deepfake-audio-2408.10853"/></url>
<url><loc>https://scifaro.com/en/abs/rage-music-classification-and-analysis-using-k-nearest-neighbour-random-forest-support-vector-machine-convolutional-neural-networks-and-gradient-boosting-2408.10864</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rage-music-classification-and-analysis-using-k-nearest-neighbour-random-forest-support-vector-machine-convolutional-neural-networks-and-gradient-boosting-2408.10864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rage-music-classification-and-analysis-using-k-nearest-neighbour-random-forest-support-vector-machine-convolutional-neural-networks-and-gradient-boosting-2408.10864"/></url>
<url><loc>https://scifaro.com/en/abs/audio-match-cutting-finding-and-creating-matching-audio-transitions-in-movies-and-videos-2408.10998</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-match-cutting-finding-and-creating-matching-audio-transitions-in-movies-and-videos-2408.10998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-match-cutting-finding-and-creating-matching-audio-transitions-in-movies-and-videos-2408.10998"/></url>
<url><loc>https://scifaro.com/en/abs/but-systems-and-analyses-for-the-asvspoof-5-challenge-2408.11152</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-systems-and-analyses-for-the-asvspoof-5-challenge-2408.11152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-systems-and-analyses-for-the-asvspoof-5-challenge-2408.11152"/></url>
<url><loc>https://scifaro.com/en/abs/ddsp-guitar-amp-interpretable-guitar-amplifier-modeling-2408.11405</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddsp-guitar-amp-interpretable-guitar-amplifier-modeling-2408.11405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddsp-guitar-amp-interpretable-guitar-amplifier-modeling-2408.11405"/></url>
<url><loc>https://scifaro.com/en/abs/improvement-speaker-similarity-for-zero-shot-any-to-any-voice-conversion-of-whispered-and-regular-speech-2408.11528</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvement-speaker-similarity-for-zero-shot-any-to-any-voice-conversion-of-whispered-and-regular-speech-2408.11528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvement-speaker-similarity-for-zero-shot-any-to-any-voice-conversion-of-whispered-and-regular-speech-2408.11528"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-noise-disentanglement-and-adversarial-training-framework-for-robust-speaker-verification-2408.11562</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-noise-disentanglement-and-adversarial-training-framework-for-robust-speaker-verification-2408.11562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-noise-disentanglement-and-adversarial-training-framework-for-robust-speaker-verification-2408.11562"/></url>
<url><loc>https://scifaro.com/en/abs/video-foley-two-stage-video-to-sound-generation-via-temporal-event-condition-for-foley-sound-2408.11915</loc><lastmod>2025-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video-foley-two-stage-video-to-sound-generation-via-temporal-event-condition-for-foley-sound-2408.11915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video-foley-two-stage-video-to-sound-generation-via-temporal-event-condition-for-foley-sound-2408.11915"/></url>
<url><loc>https://scifaro.com/en/abs/developing-vocal-system-impaired-patient-aimed-voice-quality-assessment-approach-using-asr-representation-included-multiple-features-2408.12279</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-vocal-system-impaired-patient-aimed-voice-quality-assessment-approach-using-asr-representation-included-multiple-features-2408.12279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-vocal-system-impaired-patient-aimed-voice-quality-assessment-approach-using-asr-representation-included-multiple-features-2408.12279"/></url>
<url><loc>https://scifaro.com/en/abs/self-learning-for-personalized-keyword-spotting-on-ultra-low-power-audio-sensors-2408.12481</loc><lastmod>2025-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-learning-for-personalized-keyword-spotting-on-ultra-low-power-audio-sensors-2408.12481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-learning-for-personalized-keyword-spotting-on-ultra-low-power-audio-sensors-2408.12481"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-time-variant-responses-of-optical-compressors-with-selective-state-space-models-2408.12549</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-time-variant-responses-of-optical-compressors-with-selective-state-space-models-2408.12549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-time-variant-responses-of-optical-compressors-with-selective-state-space-models-2408.12549"/></url>
<url><loc>https://scifaro.com/en/abs/melody-predominates-over-harmony-in-the-evolution-of-musical-scales-across-96-countries-2408.12633</loc><lastmod>2025-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-predominates-over-harmony-in-the-evolution-of-musical-scales-across-96-countries-2408.12633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-predominates-over-harmony-in-the-evolution-of-musical-scales-across-96-countries-2408.12633"/></url>
<url><loc>https://scifaro.com/en/abs/information-and-motor-constraints-shape-melodic-diversity-across-cultures-2408.12635</loc><lastmod>2025-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/information-and-motor-constraints-shape-melodic-diversity-across-cultures-2408.12635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/information-and-motor-constraints-shape-melodic-diversity-across-cultures-2408.12635"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-generative-modeling-of-melodic-vocal-contours-in-hindustani-classical-music-2408.12658</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-generative-modeling-of-melodic-vocal-contours-in-hindustani-classical-music-2408.12658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-generative-modeling-of-melodic-vocal-contours-in-hindustani-classical-music-2408.12658"/></url>
<url><loc>https://scifaro.com/en/abs/on-class-separability-pitfalls-in-audio-text-contrastive-zero-shot-learning-2408.13068</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-class-separability-pitfalls-in-audio-text-contrastive-zero-shot-learning-2408.13068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-class-separability-pitfalls-in-audio-text-contrastive-zero-shot-learning-2408.13068"/></url>
<url><loc>https://scifaro.com/en/abs/nest-self-supervised-fast-conformer-as-all-purpose-seasoning-to-speech-processing-tasks-2408.13106</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nest-self-supervised-fast-conformer-as-all-purpose-seasoning-to-speech-processing-tasks-2408.13106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nest-self-supervised-fast-conformer-as-all-purpose-seasoning-to-speech-processing-tasks-2408.13106"/></url>
<url><loc>https://scifaro.com/en/abs/eavit-external-attention-vision-transformer-for-audio-classification-2408.13201</loc><lastmod>2024-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eavit-external-attention-vision-transformer-for-audio-classification-2408.13201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eavit-external-attention-vision-transformer-for-audio-classification-2408.13201"/></url>
<url><loc>https://scifaro.com/en/abs/toward-improving-synthetic-audio-spoofing-detection-robustness-via-meta-learning-and-disentangled-training-with-adversarial-examples-2408.13341</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-improving-synthetic-audio-spoofing-detection-robustness-via-meta-learning-and-disentangled-training-with-adversarial-examples-2408.13341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-improving-synthetic-audio-spoofing-detection-robustness-via-meta-learning-and-disentangled-training-with-adversarial-examples-2408.13341"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-training-with-adversarial-examples-for-robust-small-footprint-keyword-spotting-2408.13355</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-training-with-adversarial-examples-for-robust-small-footprint-keyword-spotting-2408.13355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-training-with-adversarial-examples-for-robust-small-footprint-keyword-spotting-2408.13355"/></url>
<url><loc>https://scifaro.com/en/abs/streamaad-decoding-spatial-auditory-attention-with-a-streaming-architecture-2408.13522</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamaad-decoding-spatial-auditory-attention-with-a-streaming-architecture-2408.13522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamaad-decoding-spatial-auditory-attention-with-a-streaming-architecture-2408.13522"/></url>
<url><loc>https://scifaro.com/en/abs/studying-the-effect-of-audio-filters-in-pre-trained-models-for-environmental-sound-classification-2408.13644</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/studying-the-effect-of-audio-filters-in-pre-trained-models-for-environmental-sound-classification-2408.13644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/studying-the-effect-of-audio-filters-in-pre-trained-models-for-environmental-sound-classification-2408.13644"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-the-impact-of-splicing-artifacts-in-partially-fake-speech-signals-2408.13784</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-the-impact-of-splicing-artifacts-in-partially-fake-speech-signals-2408.13784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-the-impact-of-splicing-artifacts-in-partially-fake-speech-signals-2408.13784"/></url>
<url><loc>https://scifaro.com/en/abs/simplespeech-2-towards-simple-and-efficient-text-to-speech-with-flow-based-scalar-latent-transformer-diffusion-models-2408.13893</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simplespeech-2-towards-simple-and-efficient-text-to-speech-with-flow-based-scalar-latent-transformer-diffusion-models-2408.13893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simplespeech-2-towards-simple-and-efficient-text-to-speech-with-flow-based-scalar-latent-transformer-diffusion-models-2408.13893"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-self-motion-and-room-familiarity-on-sound-source-localization-in-virtual-environments-2408.13904</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-self-motion-and-room-familiarity-on-sound-source-localization-in-virtual-environments-2408.13904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-self-motion-and-room-familiarity-on-sound-source-localization-in-virtual-environments-2408.13904"/></url>
<url><loc>https://scifaro.com/en/abs/wav2small-distilling-wav2vec2-to-72k-parameters-for-low-resource-speech-emotion-recognition-2408.13920</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2small-distilling-wav2vec2-to-72k-parameters-for-low-resource-speech-emotion-recognition-2408.13920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2small-distilling-wav2vec2-to-72k-parameters-for-low-resource-speech-emotion-recognition-2408.13920"/></url>
<url><loc>https://scifaro.com/en/abs/a-preliminary-case-study-on-long-form-in-the-wild-audio-spoofing-detection-2408.14066</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-preliminary-case-study-on-long-form-in-the-wild-audio-spoofing-detection-2408.14066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-preliminary-case-study-on-long-form-in-the-wild-audio-spoofing-detection-2408.14066"/></url>
<url><loc>https://scifaro.com/en/abs/sonics-synthetic-or-not-identifying-counterfeit-songs-2408.14080</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonics-synthetic-or-not-identifying-counterfeit-songs-2408.14080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonics-synthetic-or-not-identifying-counterfeit-songs-2408.14080"/></url>
<url><loc>https://scifaro.com/en/abs/diminishing-domain-mismatch-for-dnn-based-acoustic-distance-estimation-via-stochastic-room-reverberation-models-2408.14213</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diminishing-domain-mismatch-for-dnn-based-acoustic-distance-estimation-via-stochastic-room-reverberation-models-2408.14213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diminishing-domain-mismatch-for-dnn-based-acoustic-distance-estimation-via-stochastic-room-reverberation-models-2408.14213"/></url>
<url><loc>https://scifaro.com/en/abs/foundation-models-for-music-a-survey-2408.14340</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foundation-models-for-music-a-survey-2408.14340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foundation-models-for-music-a-survey-2408.14340"/></url>
<url><loc>https://scifaro.com/en/abs/stylespeech-parameter-efficient-fine-tuning-for-pre-trained-controllable-text-to-speech-2408.14713</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylespeech-parameter-efficient-fine-tuning-for-pre-trained-controllable-text-to-speech-2408.14713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylespeech-parameter-efficient-fine-tuning-for-pre-trained-controllable-text-to-speech-2408.14713"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-machine-learning-for-sound-field-estimation-2408.14731</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-machine-learning-for-sound-field-estimation-2408.14731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-machine-learning-for-sound-field-estimation-2408.14731"/></url>
<url><loc>https://scifaro.com/en/abs/voicetailor-lightweight-plug-in-adapter-for-diffusion-based-personalized-text-to-speech-2408.14739</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicetailor-lightweight-plug-in-adapter-for-diffusion-based-personalized-text-to-speech-2408.14739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicetailor-lightweight-plug-in-adapter-for-diffusion-based-personalized-text-to-speech-2408.14739"/></url>
<url><loc>https://scifaro.com/en/abs/coopasd-cooperative-machine-anomalous-sound-detection-with-privacy-concerns-2408.14753</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coopasd-cooperative-machine-anomalous-sound-detection-with-privacy-concerns-2408.14753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coopasd-cooperative-machine-anomalous-sound-detection-with-privacy-concerns-2408.14753"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-self-supervised-audio-representations-for-data-efficient-acoustic-scene-classification-2408.14862</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-self-supervised-audio-representations-for-data-efficient-acoustic-scene-classification-2408.14862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-self-supervised-audio-representations-for-data-efficient-acoustic-scene-classification-2408.14862"/></url>
<url><loc>https://scifaro.com/en/abs/the-voxceleb-speaker-recognition-challenge-a-retrospective-2408.14886</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voxceleb-speaker-recognition-challenge-a-retrospective-2408.14886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voxceleb-speaker-recognition-challenge-a-retrospective-2408.14886"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-classification-system-for-coconut-maturity-levels-based-on-acoustic-signals-2408.14910</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-classification-system-for-coconut-maturity-levels-based-on-acoustic-signals-2408.14910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-classification-system-for-coconut-maturity-levels-based-on-acoustic-signals-2408.14910"/></url>
<url><loc>https://scifaro.com/en/abs/morphogenesis-of-sound-creates-acoustic-rainbows-2408.14953</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/morphogenesis-of-sound-creates-acoustic-rainbows-2408.14953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/morphogenesis-of-sound-creates-acoustic-rainbows-2408.14953"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-symbolic-music-arrangement-track-aware-reconstruction-and-structured-tokenization-2408.15176</loc><lastmod>2025-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-symbolic-music-arrangement-track-aware-reconstruction-and-structured-tokenization-2408.15176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-symbolic-music-arrangement-track-aware-reconstruction-and-structured-tokenization-2408.15176"/></url>
<url><loc>https://scifaro.com/en/abs/emoattack-utilizing-emotional-voice-conversion-for-speech-backdoor-attacks-on-deep-speech-classification-models-2408.15508</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emoattack-utilizing-emotional-voice-conversion-for-speech-backdoor-attacks-on-deep-speech-classification-models-2408.15508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emoattack-utilizing-emotional-voice-conversion-for-speech-backdoor-attacks-on-deep-speech-classification-models-2408.15508"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-pmfa-partial-multi-scale-feature-aggregation-for-speaker-verification-using-whisper-models-2408.15585</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-pmfa-partial-multi-scale-feature-aggregation-for-speaker-verification-using-whisper-models-2408.15585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-pmfa-partial-multi-scale-feature-aggregation-for-speaker-verification-using-whisper-models-2408.15585"/></url>
<url><loc>https://scifaro.com/en/abs/voxinstruct-expressive-human-instruction-to-speech-generation-with-unified-multilingual-codec-language-modelling-2408.15676</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxinstruct-expressive-human-instruction-to-speech-generation-with-unified-multilingual-codec-language-modelling-2408.15676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxinstruct-expressive-human-instruction-to-speech-generation-with-unified-multilingual-codec-language-modelling-2408.15676"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-automatic-multi-level-airway-collapse-monitoring-on-obstructive-sleep-apnea-patients-2408.16030</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-automatic-multi-level-airway-collapse-monitoring-on-obstructive-sleep-apnea-patients-2408.16030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-automatic-multi-level-airway-collapse-monitoring-on-obstructive-sleep-apnea-patients-2408.16030"/></url>
<url><loc>https://scifaro.com/en/abs/improving-generalization-of-speech-separation-in-real-world-scenarios-strategies-in-simulation-optimization-and-evaluation-2408.16126</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-generalization-of-speech-separation-in-real-world-scenarios-strategies-in-simulation-optimization-and-evaluation-2408.16126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-generalization-of-speech-separation-in-real-world-scenarios-strategies-in-simulation-optimization-and-evaluation-2408.16126"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-beam-search-for-language-model-based-text-to-speech-synthesis-2408.16373</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-beam-search-for-language-model-based-text-to-speech-synthesis-2408.16373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-beam-search-for-language-model-based-text-to-speech-synthesis-2408.16373"/></url>
<url><loc>https://scifaro.com/en/abs/rave-for-speech-efficient-voice-conversion-at-high-sampling-rates-2408.16546</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rave-for-speech-efficient-voice-conversion-at-high-sampling-rates-2408.16546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rave-for-speech-efficient-voice-conversion-at-high-sampling-rates-2408.16546"/></url>
<url><loc>https://scifaro.com/en/abs/axlstms-learning-self-supervised-audio-representations-with-xlstms-2408.16568</loc><lastmod>2025-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/axlstms-learning-self-supervised-audio-representations-with-xlstms-2408.16568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/axlstms-learning-self-supervised-audio-representations-with-xlstms-2408.16568"/></url>
<url><loc>https://scifaro.com/en/abs/towards-efficient-modelling-of-string-dynamics-a-comparison-of-state-space-and-koopman-based-deep-learning-methods-2408.16650</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-efficient-modelling-of-string-dynamics-a-comparison-of-state-space-and-koopman-based-deep-learning-methods-2408.16650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-efficient-modelling-of-string-dynamics-a-comparison-of-state-space-and-koopman-based-deep-learning-methods-2408.16650"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-speaker-profiles-for-impersonation-audio-detection-2408.17009</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-speaker-profiles-for-impersonation-audio-detection-2408.17009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-speaker-profiles-for-impersonation-audio-detection-2408.17009"/></url>
<url><loc>https://scifaro.com/en/abs/aasist3-kan-enhanced-aasist-speech-deepfake-detection-using-ssl-features-and-additional-regularization-for-the-asvspoof-2024-challenge-2408.17352</loc><lastmod>2026-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aasist3-kan-enhanced-aasist-speech-deepfake-detection-using-ssl-features-and-additional-regularization-for-the-asvspoof-2024-challenge-2408.17352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aasist3-kan-enhanced-aasist-speech-deepfake-detection-using-ssl-features-and-additional-regularization-for-the-asvspoof-2024-challenge-2408.17352"/></url>
<url><loc>https://scifaro.com/en/abs/hold-me-tight-stable-encoder-decoder-design-for-speech-enhancement-2408.17358</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hold-me-tight-stable-encoder-decoder-design-for-speech-enhancement-2408.17358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hold-me-tight-stable-encoder-decoder-design-for-speech-enhancement-2408.17358"/></url>
<url><loc>https://scifaro.com/en/abs/audio-enhancement-from-multiple-crowdsourced-recordings-a-simple-and-effective-baseline-2408.17434</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-enhancement-from-multiple-crowdsourced-recordings-a-simple-and-effective-baseline-2408.17434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-enhancement-from-multiple-crowdsourced-recordings-a-simple-and-effective-baseline-2408.17434"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-augmentation-an-unsupervised-learning-approach-for-keyword-spotting-in-speech-technology-2409.00356</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-augmentation-an-unsupervised-learning-approach-for-keyword-spotting-in-speech-technology-2409.00356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-augmentation-an-unsupervised-learning-approach-for-keyword-spotting-in-speech-technology-2409.00356"/></url>
<url><loc>https://scifaro.com/en/abs/density-adaptive-attention-based-speech-network-enhancing-feature-understanding-for-mental-health-disorders-2409.00391</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/density-adaptive-attention-based-speech-network-enhancing-feature-understanding-for-mental-health-disorders-2409.00391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/density-adaptive-attention-based-speech-network-enhancing-feature-understanding-for-mental-health-disorders-2409.00391"/></url>
<url><loc>https://scifaro.com/en/abs/multi-label-zero-shot-audio-classification-with-temporal-attention-2409.00408</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-label-zero-shot-audio-classification-with-temporal-attention-2409.00408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-label-zero-shot-audio-classification-with-temporal-attention-2409.00408"/></url>
<url><loc>https://scifaro.com/en/abs/flux-that-plays-music-2409.00587</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flux-that-plays-music-2409.00587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flux-that-plays-music-2409.00587"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-your-speech-style-a-novel-zero-shot-identity-disentanglement-face-based-voice-conversion-2409.00700</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-your-speech-style-a-novel-zero-shot-identity-disentanglement-face-based-voice-conversion-2409.00700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-your-speech-style-a-novel-zero-shot-identity-disentanglement-face-based-voice-conversion-2409.00700"/></url>
<url><loc>https://scifaro.com/en/abs/maskgct-zero-shot-text-to-speech-with-masked-generative-codec-transformer-2409.00750</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maskgct-zero-shot-text-to-speech-with-masked-generative-codec-transformer-2409.00750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maskgct-zero-shot-text-to-speech-with-masked-generative-codec-transformer-2409.00750"/></url>
<url><loc>https://scifaro.com/en/abs/serialized-speech-information-guidance-with-overlapped-encoding-separation-for-multi-speaker-automatic-speech-recognition-2409.00815</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serialized-speech-information-guidance-with-overlapped-encoding-separation-for-multi-speaker-automatic-speech-recognition-2409.00815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serialized-speech-information-guidance-with-overlapped-encoding-separation-for-multi-speaker-automatic-speech-recognition-2409.00815"/></url>
<url><loc>https://scifaro.com/en/abs/libriheavymix-a-20-000-hour-dataset-for-single-channel-reverberant-multi-talker-speech-separation-asr-and-speaker-diarization-2409.00819</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libriheavymix-a-20-000-hour-dataset-for-single-channel-reverberant-multi-talker-speech-separation-asr-and-speaker-diarization-2409.00819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libriheavymix-a-20-000-hour-dataset-for-single-channel-reverberant-multi-talker-speech-separation-asr-and-speaker-diarization-2409.00819"/></url>
<url><loc>https://scifaro.com/en/abs/mmt-bert-chord-aware-symbolic-music-generation-based-on-multitrack-music-transformer-and-musicbert-2409.00919</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmt-bert-chord-aware-symbolic-music-generation-based-on-multitrack-music-transformer-and-musicbert-2409.00919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmt-bert-chord-aware-symbolic-music-generation-based-on-multitrack-music-transformer-and-musicbert-2409.00919"/></url>
<url><loc>https://scifaro.com/en/abs/socodec-a-semantic-ordered-multi-stream-speech-codec-for-efficient-language-model-based-text-to-speech-synthesis-2409.00933</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/socodec-a-semantic-ordered-multi-stream-speech-codec-for-efficient-language-model-based-text-to-speech-synthesis-2409.00933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/socodec-a-semantic-ordered-multi-stream-speech-codec-for-efficient-language-model-based-text-to-speech-synthesis-2409.00933"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-synthetic-audio-conversations-generation-using-large-language-models-2409.00946</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-synthetic-audio-conversations-generation-using-large-language-models-2409.00946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-synthetic-audio-conversations-generation-using-large-language-models-2409.00946"/></url>
<url><loc>https://scifaro.com/en/abs/spectron-target-speaker-extraction-using-conditional-transformer-with-adversarial-refinement-2409.01352</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectron-target-speaker-extraction-using-conditional-transformer-with-adversarial-refinement-2409.01352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectron-target-speaker-extraction-using-conditional-transformer-with-adversarial-refinement-2409.01352"/></url>
<url><loc>https://scifaro.com/en/abs/effective-noise-aware-data-simulation-for-domain-adaptive-speech-enhancement-leveraging-dynamic-stochastic-perturbation-2409.01545</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-noise-aware-data-simulation-for-domain-adaptive-speech-enhancement-leveraging-dynamic-stochastic-perturbation-2409.01545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-noise-aware-data-simulation-for-domain-adaptive-speech-enhancement-leveraging-dynamic-stochastic-perturbation-2409.01545"/></url>
<url><loc>https://scifaro.com/en/abs/voxhakka-a-dialectally-diverse-multi-speaker-text-to-speech-system-for-taiwanese-hakka-2409.01548</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxhakka-a-dialectally-diverse-multi-speaker-text-to-speech-system-for-taiwanese-hakka-2409.01548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxhakka-a-dialectally-diverse-multi-speaker-text-to-speech-system-for-taiwanese-hakka-2409.01548"/></url>
<url><loc>https://scifaro.com/en/abs/pureformer-vc-non-parallel-one-shot-voice-conversion-with-pure-transformer-blocks-and-triplet-discriminative-training-2409.01668</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pureformer-vc-non-parallel-one-shot-voice-conversion-with-pure-transformer-blocks-and-triplet-discriminative-training-2409.01668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pureformer-vc-non-parallel-one-shot-voice-conversion-with-pure-transformer-blocks-and-triplet-discriminative-training-2409.01668"/></url>
<url><loc>https://scifaro.com/en/abs/ustc-kxdigit-system-description-for-asvspoof5-challenge-2409.01695</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ustc-kxdigit-system-description-for-asvspoof5-challenge-2409.01695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ustc-kxdigit-system-description-for-asvspoof5-challenge-2409.01695"/></url>
<url><loc>https://scifaro.com/en/abs/the-role-of-large-language-models-in-musicology-are-we-ready-to-trust-the-machines-2409.01864</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-role-of-large-language-models-in-musicology-are-we-ready-to-trust-the-machines-2409.01864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-role-of-large-language-models-in-musicology-are-we-ready-to-trust-the-machines-2409.01864"/></url>
<url><loc>https://scifaro.com/en/abs/activity-guided-industrial-anomalous-sound-detection-against-interferences-2409.01885</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/activity-guided-industrial-anomalous-sound-detection-against-interferences-2409.01885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/activity-guided-industrial-anomalous-sound-detection-against-interferences-2409.01885"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-order-preserved-optimal-transport-based-cross-modal-knowledge-transfer-learning-for-asr-2409.02239</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-order-preserved-optimal-transport-based-cross-modal-knowledge-transfer-learning-for-asr-2409.02239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-order-preserved-optimal-transport-based-cross-modal-knowledge-transfer-learning-for-asr-2409.02239"/></url>
<url><loc>https://scifaro.com/en/abs/fastvoicegrad-one-step-diffusion-based-voice-conversion-with-adversarial-conditional-diffusion-distillation-2409.02245</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastvoicegrad-one-step-diffusion-based-voice-conversion-with-adversarial-conditional-diffusion-distillation-2409.02245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastvoicegrad-one-step-diffusion-based-voice-conversion-with-adversarial-conditional-diffusion-distillation-2409.02245"/></url>
<url><loc>https://scifaro.com/en/abs/lstmse-net-long-short-term-speech-enhancement-network-for-audio-visual-speech-enhancement-2409.02266</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstmse-net-long-short-term-speech-enhancement-network-for-audio-visual-speech-enhancement-2409.02266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstmse-net-long-short-term-speech-enhancement-network-for-audio-visual-speech-enhancement-2409.02266"/></url>
<url><loc>https://scifaro.com/en/abs/musicmamba-a-dual-feature-modeling-approach-for-generating-chinese-traditional-music-with-modal-precision-2409.02421</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicmamba-a-dual-feature-modeling-approach-for-generating-chinese-traditional-music-with-modal-precision-2409.02421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicmamba-a-dual-feature-modeling-approach-for-generating-chinese-traditional-music-with-modal-precision-2409.02421"/></url>
<url><loc>https://scifaro.com/en/abs/neurospex-neuro-guided-speaker-extraction-with-cross-modal-attention-2409.02489</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neurospex-neuro-guided-speaker-extraction-with-cross-modal-attention-2409.02489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neurospex-neuro-guided-speaker-extraction-with-cross-modal-attention-2409.02489"/></url>
<url><loc>https://scifaro.com/en/abs/training-universal-vocoders-with-feature-smoothing-based-augmentation-methods-for-high-quality-tts-systems-2409.02517</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-universal-vocoders-with-feature-smoothing-based-augmentation-methods-for-high-quality-tts-systems-2409.02517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-universal-vocoders-with-feature-smoothing-based-augmentation-methods-for-high-quality-tts-systems-2409.02517"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-recording-condition-and-number-of-monitored-days-on-discriminative-power-of-the-daily-phonotrauma-index-2409.02800</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-recording-condition-and-number-of-monitored-days-on-discriminative-power-of-the-daily-phonotrauma-index-2409.02800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-recording-condition-and-number-of-monitored-days-on-discriminative-power-of-the-daily-phonotrauma-index-2409.02800"/></url>
<url><loc>https://scifaro.com/en/abs/multi-track-musicldm-towards-versatile-music-generation-with-latent-diffusion-model-2409.02845</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-track-musicldm-towards-versatile-music-generation-with-latent-diffusion-model-2409.02845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-track-musicldm-towards-versatile-music-generation-with-latent-diffusion-model-2409.02845"/></url>
<url><loc>https://scifaro.com/en/abs/latent-watermarking-of-audio-generative-models-2409.02915</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-watermarking-of-audio-generative-models-2409.02915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-watermarking-of-audio-generative-models-2409.02915"/></url>
<url><loc>https://scifaro.com/en/abs/sympac-scalable-symbolic-music-generation-with-prompts-and-constraints-2409.03055</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sympac-scalable-symbolic-music-generation-with-prompts-and-constraints-2409.03055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sympac-scalable-symbolic-music-generation-with-prompts-and-constraints-2409.03055"/></url>
<url><loc>https://scifaro.com/en/abs/fireredtts-a-foundation-text-to-speech-framework-for-industry-level-generative-speech-applications-2409.03283</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fireredtts-a-foundation-text-to-speech-framework-for-industry-level-generative-speech-applications-2409.03283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fireredtts-a-foundation-text-to-speech-framework-for-industry-level-generative-speech-applications-2409.03283"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-indoor-scene-depth-maps-from-ultrasonic-echoes-2409.03336</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-indoor-scene-depth-maps-from-ultrasonic-echoes-2409.03336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-indoor-scene-depth-maps-from-ultrasonic-echoes-2409.03336"/></url>
<url><loc>https://scifaro.com/en/abs/atennuate-optimized-real-time-speech-enhancement-with-deep-ssms-on-raw-audio-2409.03377</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atennuate-optimized-real-time-speech-enhancement-with-deep-ssms-on-raw-audio-2409.03377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atennuate-optimized-real-time-speech-enhancement-with-deep-ssms-on-raw-audio-2409.03377"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-laryngoscopic-video-analysis-for-assisted-diagnosis-of-vocal-fold-paralysis-2409.03597</loc><lastmod>2026-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-laryngoscopic-video-analysis-for-assisted-diagnosis-of-vocal-fold-paralysis-2409.03597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-laryngoscopic-video-analysis-for-assisted-diagnosis-of-vocal-fold-paralysis-2409.03597"/></url>
<url><loc>https://scifaro.com/en/abs/clustering-of-indonesian-and-western-gamelan-orchestras-through-machine-learning-of-performance-parameters-2409.03713</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clustering-of-indonesian-and-western-gamelan-orchestras-through-machine-learning-of-performance-parameters-2409.03713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clustering-of-indonesian-and-western-gamelan-orchestras-through-machine-learning-of-performance-parameters-2409.03713"/></url>
<url><loc>https://scifaro.com/en/abs/applications-and-advances-of-artificial-intelligence-in-music-generation-a-review-2409.03715</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applications-and-advances-of-artificial-intelligence-in-music-generation-a-review-2409.03715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applications-and-advances-of-artificial-intelligence-in-music-generation-a-review-2409.03715"/></url>
<url><loc>https://scifaro.com/en/abs/sample-efficient-diffusion-for-text-to-speech-synthesis-2409.03717</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-efficient-diffusion-for-text-to-speech-synthesis-2409.03717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-efficient-diffusion-for-text-to-speech-synthesis-2409.03717"/></url>
<url><loc>https://scifaro.com/en/abs/metabgm-dynamic-soundtrack-transformation-for-continuous-multi-scene-experiences-with-ambient-awareness-and-personalization-2409.03844</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metabgm-dynamic-soundtrack-transformation-for-continuous-multi-scene-experiences-with-ambient-awareness-and-personalization-2409.03844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metabgm-dynamic-soundtrack-transformation-for-continuous-multi-scene-experiences-with-ambient-awareness-and-personalization-2409.03844"/></url>
<url><loc>https://scifaro.com/en/abs/searching-for-effective-preprocessing-method-and-cnn-based-architecture-with-efficient-channel-attention-on-speech-emotion-recognition-2409.04007</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/searching-for-effective-preprocessing-method-and-cnn-based-architecture-with-efficient-channel-attention-on-speech-emotion-recognition-2409.04007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/searching-for-effective-preprocessing-method-and-cnn-based-architecture-with-efficient-channel-attention-on-speech-emotion-recognition-2409.04007"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-neural-audio-codecs-for-speech-language-model-based-speech-generation-2409.04016</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-neural-audio-codecs-for-speech-language-model-based-speech-generation-2409.04016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-neural-audio-codecs-for-speech-language-model-based-speech-generation-2409.04016"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-contrastive-learning-and-self-training-for-multimodal-emotion-recognition-with-limited-labeled-samples-2409.04447</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-contrastive-learning-and-self-training-for-multimodal-emotion-recognition-with-limited-labeled-samples-2409.04447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-contrastive-learning-and-self-training-for-multimodal-emotion-recognition-with-limited-labeled-samples-2409.04447"/></url>
<url><loc>https://scifaro.com/en/abs/mel-roformer-for-vocal-separation-and-vocal-melody-transcription-2409.04702</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-roformer-for-vocal-separation-and-vocal-melody-transcription-2409.04702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-roformer-for-vocal-separation-and-vocal-melody-transcription-2409.04702"/></url>
<url><loc>https://scifaro.com/en/abs/pb-lrdwws-system-for-the-slt-2024-low-resource-dysarthria-wake-up-word-spotting-challenge-2409.04799</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pb-lrdwws-system-for-the-slt-2024-low-resource-dysarthria-wake-up-word-spotting-challenge-2409.04799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pb-lrdwws-system-for-the-slt-2024-low-resource-dysarthria-wake-up-word-spotting-challenge-2409.04799"/></url>
<url><loc>https://scifaro.com/en/abs/flow-tsvad-target-speaker-voice-activity-detection-via-latent-flow-matching-2409.04859</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flow-tsvad-target-speaker-voice-activity-detection-via-latent-flow-matching-2409.04859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flow-tsvad-target-speaker-voice-activity-detection-via-latent-flow-matching-2409.04859"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-efficient-breath-sound-removal-in-studio-audio-recordings-2409.04949</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-efficient-breath-sound-removal-in-studio-audio-recordings-2409.04949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-efficient-breath-sound-removal-in-studio-audio-recordings-2409.04949"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-neural-networks-architectures-for-spring-reverb-modelling-2409.04953</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-neural-networks-architectures-for-spring-reverb-modelling-2409.04953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-neural-networks-architectures-for-spring-reverb-modelling-2409.04953"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-the-prosody-and-semantic-information-with-pre-trained-model-for-in-context-learning-based-zero-shot-voice-conversion-2409.05004</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-the-prosody-and-semantic-information-with-pre-trained-model-for-in-context-learning-based-zero-shot-voice-conversion-2409.05004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-the-prosody-and-semantic-information-with-pre-trained-model-for-in-context-learning-based-zero-shot-voice-conversion-2409.05004"/></url>
<url><loc>https://scifaro.com/en/abs/audio-guided-fusion-techniques-for-multimodal-emotion-analysis-2409.05007</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-guided-fusion-techniques-for-multimodal-emotion-analysis-2409.05007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-guided-fusion-techniques-for-multimodal-emotion-analysis-2409.05007"/></url>
<url><loc>https://scifaro.com/en/abs/deep-generic-representations-for-domain-generalized-anomalous-sound-detection-2409.05035</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-generic-representations-for-domain-generalized-anomalous-sound-detection-2409.05035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-generic-representations-for-domain-generalized-anomalous-sound-detection-2409.05035"/></url>
<url><loc>https://scifaro.com/en/abs/the-first-cadenza-challenges-using-machine-learning-competitions-to-improve-music-for-listeners-with-a-hearing-loss-2409.05095</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-first-cadenza-challenges-using-machine-learning-competitions-to-improve-music-for-listeners-with-a-hearing-loss-2409.05095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-first-cadenza-challenges-using-machine-learning-competitions-to-improve-music-for-listeners-with-a-hearing-loss-2409.05095"/></url>
<url><loc>https://scifaro.com/en/abs/better-spanish-emotion-recognition-in-the-wild-bringing-attention-to-deep-spectrum-voice-analysis-2409.05148</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/better-spanish-emotion-recognition-in-the-wild-bringing-attention-to-deep-spectrum-voice-analysis-2409.05148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/better-spanish-emotion-recognition-in-the-wild-bringing-attention-to-deep-spectrum-voice-analysis-2409.05148"/></url>
<url><loc>https://scifaro.com/en/abs/machine-anomalous-sound-detection-using-spectral-temporal-modulation-representations-derived-from-machine-specific-filterbanks-2409.05319</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-anomalous-sound-detection-using-spectral-temporal-modulation-representations-derived-from-machine-specific-filterbanks-2409.05319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-anomalous-sound-detection-using-spectral-temporal-modulation-representations-derived-from-machine-specific-filterbanks-2409.05319"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speaker-diarization-current-databases-approaches-and-challenges-2409.05659</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speaker-diarization-current-databases-approaches-and-challenges-2409.05659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speaker-diarization-current-databases-approaches-and-challenges-2409.05659"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-latency-in-asr-systems-a-methodological-perspective-for-real-time-use-2409.05674</loc><lastmod>2025-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-latency-in-asr-systems-a-methodological-perspective-for-real-time-use-2409.05674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-latency-in-asr-systems-a-methodological-perspective-for-real-time-use-2409.05674"/></url>
<url><loc>https://scifaro.com/en/abs/vector-quantized-diffusion-model-based-speech-bandwidth-extension-2409.05784</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-quantized-diffusion-model-based-speech-bandwidth-extension-2409.05784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-quantized-diffusion-model-based-speech-bandwidth-extension-2409.05784"/></url>
<url><loc>https://scifaro.com/en/abs/pdaf-a-phonetic-debiasing-attention-framework-for-speaker-verification-2409.05799</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pdaf-a-phonetic-debiasing-attention-framework-for-speaker-verification-2409.05799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pdaf-a-phonetic-debiasing-attention-framework-for-speaker-verification-2409.05799"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-learning-of-transformer-based-audio-deepfake-detection-2409.05924</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-learning-of-transformer-based-audio-deepfake-detection-2409.05924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-learning-of-transformer-based-audio-deepfake-detection-2409.05924"/></url>
<url><loc>https://scifaro.com/en/abs/musical-chords-a-novel-java-algorithm-and-app-utility-to-enumerate-chord-progressions-adhering-to-music-theory-guidelines-2409.06024</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-chords-a-novel-java-algorithm-and-app-utility-to-enumerate-chord-progressions-adhering-to-music-theory-guidelines-2409.06024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-chords-a-novel-java-algorithm-and-app-utility-to-enumerate-chord-progressions-adhering-to-music-theory-guidelines-2409.06024"/></url>
<url><loc>https://scifaro.com/en/abs/songcreator-lyrics-based-universal-song-generation-2409.06029</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songcreator-lyrics-based-universal-song-generation-2409.06029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songcreator-lyrics-based-universal-song-generation-2409.06029"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-causal-cues-strengthening-spoofed-audio-detection-with-human-discernible-linguistic-features-2409.06033</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-causal-cues-strengthening-spoofed-audio-detection-with-human-discernible-linguistic-features-2409.06033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-causal-cues-strengthening-spoofed-audio-detection-with-human-discernible-linguistic-features-2409.06033"/></url>
<url><loc>https://scifaro.com/en/abs/latent-diffusion-bridges-for-unsupervised-musical-audio-timbre-transfer-2409.06096</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-diffusion-bridges-for-unsupervised-musical-audio-timbre-transfer-2409.06096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-diffusion-bridges-for-unsupervised-musical-audio-timbre-transfer-2409.06096"/></url>
<url><loc>https://scifaro.com/en/abs/draw-an-audio-leveraging-multi-instruction-for-video-to-audio-synthesis-2409.06135</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/draw-an-audio-leveraging-multi-instruction-for-video-to-audio-synthesis-2409.06135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/draw-an-audio-leveraging-multi-instruction-for-video-to-audio-synthesis-2409.06135"/></url>
<url><loc>https://scifaro.com/en/abs/dense-dynamic-embedding-causal-target-speech-extraction-2409.06136</loc><lastmod>2024-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dense-dynamic-embedding-causal-target-speech-extraction-2409.06136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dense-dynamic-embedding-causal-target-speech-extraction-2409.06136"/></url>
<url><loc>https://scifaro.com/en/abs/mtda-hsed-mutual-assistance-tuning-and-dual-branch-aggregating-for-heterogeneous-sound-event-detection-2409.06196</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mtda-hsed-mutual-assistance-tuning-and-dual-branch-aggregating-for-heterogeneous-sound-event-detection-2409.06196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mtda-hsed-mutual-assistance-tuning-and-dual-branch-aggregating-for-heterogeneous-sound-event-detection-2409.06196"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-temporal-understanding-in-audio-question-answering-for-large-audio-language-models-2409.06223</loc><lastmod>2024-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-temporal-understanding-in-audio-question-answering-for-large-audio-language-models-2409.06223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-temporal-understanding-in-audio-question-answering-for-large-audio-language-models-2409.06223"/></url>
<url><loc>https://scifaro.com/en/abs/robustsvc-hubert-based-melody-extractor-and-adversarial-learning-for-robust-singing-voice-conversion-2409.06237</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustsvc-hubert-based-melody-extractor-and-adversarial-learning-for-robust-singing-voice-conversion-2409.06237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustsvc-hubert-based-melody-extractor-and-adversarial-learning-for-robust-singing-voice-conversion-2409.06237"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-band-split-mamba-2-network-for-music-separation-2409.06245</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-band-split-mamba-2-network-for-music-separation-2409.06245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-band-split-mamba-2-network-for-music-separation-2409.06245"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-approach-for-chord-conditioned-song-generation-2409.06307</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-chord-conditioned-song-generation-2409.06307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-chord-conditioned-song-generation-2409.06307"/></url>
<url><loc>https://scifaro.com/en/abs/voicewukong-benchmarking-deepfake-voice-detection-2409.06348</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicewukong-benchmarking-deepfake-voice-detection-2409.06348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicewukong-benchmarking-deepfake-voice-detection-2409.06348"/></url>
<url><loc>https://scifaro.com/en/abs/soft-acoustic-curvature-sensor-design-and-development-2409.06395</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-acoustic-curvature-sensor-design-and-development-2409.06395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-acoustic-curvature-sensor-design-and-development-2409.06395"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-emotional-text-to-speech-controllability-with-natural-language-guidance-through-contrastive-learning-and-diffusion-models-2409.06451</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-emotional-text-to-speech-controllability-with-natural-language-guidance-through-contrastive-learning-and-diffusion-models-2409.06451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-emotional-text-to-speech-controllability-with-natural-language-guidance-through-contrastive-learning-and-diffusion-models-2409.06451"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-beamformer-for-multi-channel-speech-enhancement-2409.06456</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-beamformer-for-multi-channel-speech-enhancement-2409.06456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-beamformer-for-multi-channel-speech-enhancement-2409.06456"/></url>
<url><loc>https://scifaro.com/en/abs/sines-transient-noise-neural-modeling-of-piano-notes-2409.06513</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sines-transient-noise-neural-modeling-of-piano-notes-2409.06513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sines-transient-noise-neural-modeling-of-piano-notes-2409.06513"/></url>
<url><loc>https://scifaro.com/en/abs/mowe-audio-multitask-audiollms-with-mixture-of-weak-encoders-2409.06635</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mowe-audio-multitask-audiollms-with-mixture-of-weak-encoders-2409.06635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mowe-audio-multitask-audiollms-with-mixture-of-weak-encoders-2409.06635"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-sub-genre-classification-for-mainstage-dance-music-2409.06690</loc><lastmod>2025-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-sub-genre-classification-for-mainstage-dance-music-2409.06690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-sub-genre-classification-for-mainstage-dance-music-2409.06690"/></url>
<url><loc>https://scifaro.com/en/abs/the-voicemos-challenge-2024-beyond-speech-quality-prediction-2409.07001</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voicemos-challenge-2024-beyond-speech-quality-prediction-2409.07001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voicemos-challenge-2024-beyond-speech-quality-prediction-2409.07001"/></url>
<url><loc>https://scifaro.com/en/abs/improving-anomalous-sound-detection-via-low-rank-adaptation-fine-tuning-of-pre-trained-audio-models-2409.07016</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-anomalous-sound-detection-via-low-rank-adaptation-fine-tuning-of-pre-trained-audio-models-2409.07016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-anomalous-sound-detection-via-low-rank-adaptation-fine-tuning-of-pre-trained-audio-models-2409.07016"/></url>
<url><loc>https://scifaro.com/en/abs/developing-a-framework-for-sonifying-variational-quantum-algorithms-implications-for-music-composition-2409.07104</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-a-framework-for-sonifying-variational-quantum-algorithms-implications-for-music-composition-2409.07104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-a-framework-for-sonifying-variational-quantum-algorithms-implications-for-music-composition-2409.07104"/></url>
<url><loc>https://scifaro.com/en/abs/linear-time-complexity-conformers-with-summarymixing-for-streaming-speech-recognition-2409.07165</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linear-time-complexity-conformers-with-summarymixing-for-streaming-speech-recognition-2409.07165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linear-time-complexity-conformers-with-summarymixing-for-streaming-speech-recognition-2409.07165"/></url>
<url><loc>https://scifaro.com/en/abs/analytic-class-incremental-learning-for-sound-source-localization-with-privacy-protection-2409.07224</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analytic-class-incremental-learning-for-sound-source-localization-with-privacy-protection-2409.07224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analytic-class-incremental-learning-for-sound-source-localization-with-privacy-protection-2409.07224"/></url>
<url><loc>https://scifaro.com/en/abs/muskits-espnet-a-comprehensive-toolkit-for-singing-voice-synthesis-in-new-paradigm-2409.07226</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muskits-espnet-a-comprehensive-toolkit-for-singing-voice-synthesis-in-new-paradigm-2409.07226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muskits-espnet-a-comprehensive-toolkit-for-singing-voice-synthesis-in-new-paradigm-2409.07226"/></url>
<url><loc>https://scifaro.com/en/abs/manatts-persian-a-recipe-for-creating-tts-datasets-for-lower-resource-languages-2409.07259</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manatts-persian-a-recipe-for-creating-tts-datasets-for-lower-resource-languages-2409.07259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manatts-persian-a-recipe-for-creating-tts-datasets-for-lower-resource-languages-2409.07259"/></url>
<url><loc>https://scifaro.com/en/abs/cross-dialect-text-to-speech-in-pitch-accent-language-incorporating-multi-dialect-phoneme-level-bert-2409.07265</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-dialect-text-to-speech-in-pitch-accent-language-incorporating-multi-dialect-phoneme-level-bert-2409.07265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-dialect-text-to-speech-in-pitch-accent-language-incorporating-multi-dialect-phoneme-level-bert-2409.07265"/></url>
<url><loc>https://scifaro.com/en/abs/salmon-a-suite-for-acoustic-language-model-evaluation-2409.07437</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salmon-a-suite-for-acoustic-language-model-evaluation-2409.07437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salmon-a-suite-for-acoustic-language-model-evaluation-2409.07437"/></url>
<url><loc>https://scifaro.com/en/abs/flexible-control-in-symbolic-music-generation-via-musical-metadata-2409.07467</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexible-control-in-symbolic-music-generation-via-musical-metadata-2409.07467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexible-control-in-symbolic-music-generation-via-musical-metadata-2409.07467"/></url>
<url><loc>https://scifaro.com/en/abs/flowsep-language-queried-sound-separation-with-rectified-flow-matching-2409.07614</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowsep-language-queried-sound-separation-with-rectified-flow-matching-2409.07614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowsep-language-queried-sound-separation-with-rectified-flow-matching-2409.07614"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-paintings-and-music-exploring-emotion-based-music-generation-through-paintings-2409.07827</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-paintings-and-music-exploring-emotion-based-music-generation-through-paintings-2409.07827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-paintings-and-music-exploring-emotion-based-music-generation-through-paintings-2409.07827"/></url>
<url><loc>https://scifaro.com/en/abs/tselm-target-speaker-extraction-using-discrete-tokens-and-language-models-2409.07841</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tselm-target-speaker-extraction-using-discrete-tokens-and-language-models-2409.07841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tselm-target-speaker-extraction-using-discrete-tokens-and-language-models-2409.07841"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-sing-voice-conversion-built-upon-clustering-based-phoneme-representations-2409.08039</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-sing-voice-conversion-built-upon-clustering-based-phoneme-representations-2409.08039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-sing-voice-conversion-built-upon-clustering-based-phoneme-representations-2409.08039"/></url>
<url><loc>https://scifaro.com/en/abs/apollo-band-sequence-modeling-for-high-quality-audio-restoration-2409.08514</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apollo-band-sequence-modeling-for-high-quality-audio-restoration-2409.08514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apollo-band-sequence-modeling-for-high-quality-audio-restoration-2409.08514"/></url>
<url><loc>https://scifaro.com/en/abs/lhq-svc-lightweight-and-high-quality-singing-voice-conversion-modeling-2409.08583</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lhq-svc-lightweight-and-high-quality-singing-voice-conversion-modeling-2409.08583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lhq-svc-lightweight-and-high-quality-singing-voice-conversion-modeling-2409.08583"/></url>
<url><loc>https://scifaro.com/en/abs/domain-invariant-representation-learning-of-bird-sounds-2409.08589</loc><lastmod>2026-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-invariant-representation-learning-of-bird-sounds-2409.08589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-invariant-representation-learning-of-bird-sounds-2409.08589"/></url>
<url><loc>https://scifaro.com/en/abs/la-rag-enhancing-llm-based-asr-accuracy-with-retrieval-augmented-generation-2409.08597</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/la-rag-enhancing-llm-based-asr-accuracy-with-retrieval-augmented-generation-2409.08597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/la-rag-enhancing-llm-based-asr-accuracy-with-retrieval-augmented-generation-2409.08597"/></url>
<url><loc>https://scifaro.com/en/abs/sta-v2a-video-to-audio-generation-with-semantic-and-temporal-alignment-2409.08601</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sta-v2a-video-to-audio-generation-with-semantic-and-temporal-alignment-2409.08601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sta-v2a-video-to-audio-generation-with-semantic-and-temporal-alignment-2409.08601"/></url>
<url><loc>https://scifaro.com/en/abs/taptotab-video-based-guitar-tabs-generation-using-ai-and-audio-analysis-2409.08618</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taptotab-video-based-guitar-tabs-generation-using-ai-and-audio-analysis-2409.08618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taptotab-video-based-guitar-tabs-generation-using-ai-and-audio-analysis-2409.08618"/></url>
<url><loc>https://scifaro.com/en/abs/rhythmic-foley-a-framework-for-seamless-audio-visual-alignment-in-video-to-audio-synthesis-2409.08628</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythmic-foley-a-framework-for-seamless-audio-visual-alignment-in-video-to-audio-synthesis-2409.08628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythmic-foley-a-framework-for-seamless-audio-visual-alignment-in-video-to-audio-synthesis-2409.08628"/></url>
<url><loc>https://scifaro.com/en/abs/lmac-td-producing-time-domain-explanations-for-audio-classifiers-2409.08655</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lmac-td-producing-time-domain-explanations-for-audio-classifiers-2409.08655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lmac-td-producing-time-domain-explanations-for-audio-classifiers-2409.08655"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-disentanglement-in-a-phoneme-level-speech-codec-for-prosody-modeling-2409.08664</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-disentanglement-in-a-phoneme-level-speech-codec-for-prosody-modeling-2409.08664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-disentanglement-in-a-phoneme-level-speech-codec-for-prosody-modeling-2409.08664"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-identification-of-individual-animals-with-hierarchical-contrastive-learning-2409.08673</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-identification-of-individual-animals-with-hierarchical-contrastive-learning-2409.08673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-identification-of-individual-animals-with-hierarchical-contrastive-learning-2409.08673"/></url>
<url><loc>https://scifaro.com/en/abs/dfadd-the-diffusion-and-flow-matching-based-audio-deepfake-dataset-2409.08731</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dfadd-the-diffusion-and-flow-matching-based-audio-deepfake-dataset-2409.08731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dfadd-the-diffusion-and-flow-matching-based-audio-deepfake-dataset-2409.08731"/></url>
<url><loc>https://scifaro.com/en/abs/energy-consumption-trends-in-sound-event-detection-systems-2409.08763</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/energy-consumption-trends-in-sound-event-detection-systems-2409.08763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/energy-consumption-trends-in-sound-event-detection-systems-2409.08763"/></url>
<url><loc>https://scifaro.com/en/abs/biomimetic-frontend-for-differentiable-audio-processing-2409.08997</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/biomimetic-frontend-for-differentiable-audio-processing-2409.08997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/biomimetic-frontend-for-differentiable-audio-processing-2409.08997"/></url>
<url><loc>https://scifaro.com/en/abs/towards-leveraging-contrastively-pretrained-neural-audio-embeddings-for-recommender-tasks-2409.09026</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-leveraging-contrastively-pretrained-neural-audio-embeddings-for-recommender-tasks-2409.09026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-leveraging-contrastively-pretrained-neural-audio-embeddings-for-recommender-tasks-2409.09026"/></url>
<url><loc>https://scifaro.com/en/abs/accentbox-towards-high-fidelity-zero-shot-accent-generation-2409.09098</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accentbox-towards-high-fidelity-zero-shot-accent-generation-2409.09098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accentbox-towards-high-fidelity-zero-shot-accent-generation-2409.09098"/></url>
<url><loc>https://scifaro.com/en/abs/seed-music-a-unified-framework-for-high-quality-and-controlled-music-generation-2409.09214</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seed-music-a-unified-framework-for-high-quality-and-controlled-music-generation-2409.09214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seed-music-a-unified-framework-for-high-quality-and-controlled-music-generation-2409.09214"/></url>
<url><loc>https://scifaro.com/en/abs/audio-text-retrieval-with-transformer-based-hierarchical-alignment-and-disentangled-cross-modal-representation-2409.09256</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-text-retrieval-with-transformer-based-hierarchical-alignment-and-disentangled-cross-modal-representation-2409.09256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-text-retrieval-with-transformer-based-hierarchical-alignment-and-disentangled-cross-modal-representation-2409.09256"/></url>
<url><loc>https://scifaro.com/en/abs/m-3-v-a-multi-modal-multi-view-approach-for-device-directed-speech-detection-2409.09284</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m-3-v-a-multi-modal-multi-view-approach-for-device-directed-speech-detection-2409.09284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m-3-v-a-multi-modal-multi-view-approach-for-device-directed-speech-detection-2409.09284"/></url>
<url><loc>https://scifaro.com/en/abs/dsclap-domain-specific-contrastive-language-audio-pre-training-2409.09289</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsclap-domain-specific-contrastive-language-audio-pre-training-2409.09289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsclap-domain-specific-contrastive-language-audio-pre-training-2409.09289"/></url>
<url><loc>https://scifaro.com/en/abs/subband-splitting-simple-efficient-and-effective-technique-for-solving-block-permutation-problem-in-determined-blind-source-separation-2409.09294</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subband-splitting-simple-efficient-and-effective-technique-for-solving-block-permutation-problem-in-determined-blind-source-separation-2409.09294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subband-splitting-simple-efficient-and-effective-technique-for-solving-block-permutation-problem-in-determined-blind-source-separation-2409.09294"/></url>
<url><loc>https://scifaro.com/en/abs/the-t05-system-for-the-voicemos-challenge-2024-transfer-learning-from-deep-image-classifier-to-naturalness-mos-prediction-of-high-quality-synthetic-speech-2409.09305</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-t05-system-for-the-voicemos-challenge-2024-transfer-learning-from-deep-image-classifier-to-naturalness-mos-prediction-of-high-quality-synthetic-speech-2409.09305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-t05-system-for-the-voicemos-challenge-2024-transfer-learning-from-deep-image-classifier-to-naturalness-mos-prediction-of-high-quality-synthetic-speech-2409.09305"/></url>
<url><loc>https://scifaro.com/en/abs/egocentric-speaker-classification-in-child-adult-dyadic-interactions-from-sensing-to-computational-modeling-2409.09340</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/egocentric-speaker-classification-in-child-adult-dyadic-interactions-from-sensing-to-computational-modeling-2409.09340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/egocentric-speaker-classification-in-child-adult-dyadic-interactions-from-sensing-to-computational-modeling-2409.09340"/></url>
<url><loc>https://scifaro.com/en/abs/macst-multi-accent-speech-synthesis-via-text-transliteration-for-accent-conversion-2409.09352</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/macst-multi-accent-speech-synthesis-via-text-transliteration-for-accent-conversion-2409.09352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/macst-multi-accent-speech-synthesis-via-text-transliteration-for-accent-conversion-2409.09352"/></url>
<url><loc>https://scifaro.com/en/abs/joint-semantic-knowledge-distillation-and-masked-acoustic-modeling-for-full-band-speech-restoration-with-improved-intelligibility-2409.09357</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-semantic-knowledge-distillation-and-masked-acoustic-modeling-for-full-band-speech-restoration-with-improved-intelligibility-2409.09357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-semantic-knowledge-distillation-and-masked-acoustic-modeling-for-full-band-speech-restoration-with-improved-intelligibility-2409.09357"/></url>
<url><loc>https://scifaro.com/en/abs/prevailing-research-areas-for-music-ai-in-the-era-of-foundation-models-2409.09378</loc><lastmod>2026-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prevailing-research-areas-for-music-ai-in-the-era-of-foundation-models-2409.09378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prevailing-research-areas-for-music-ai-in-the-era-of-foundation-models-2409.09378"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-ez-python-only-espnet-for-easy-fine-tuning-and-integration-2409.09506</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-ez-python-only-espnet-for-easy-fine-tuning-and-integration-2409.09506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-ez-python-only-espnet-for-easy-fine-tuning-and-integration-2409.09506"/></url>
<url><loc>https://scifaro.com/en/abs/explaining-deep-learning-embeddings-for-speech-emotion-recognition-by-predicting-interpretable-acoustic-features-2409.09511</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explaining-deep-learning-embeddings-for-speech-emotion-recognition-by-predicting-interpretable-acoustic-features-2409.09511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explaining-deep-learning-embeddings-for-speech-emotion-recognition-by-predicting-interpretable-acoustic-features-2409.09511"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-and-multi-modal-emotion-recognition-in-reverberant-environment-2409.09545</loc><lastmod>2025-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-and-multi-modal-emotion-recognition-in-reverberant-environment-2409.09545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-and-multi-modal-emotion-recognition-in-reverberant-environment-2409.09545"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effectiveness-of-enrollment-speech-augmentation-for-target-speaker-extraction-2409.09589</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effectiveness-of-enrollment-speech-augmentation-for-target-speaker-extraction-2409.09589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effectiveness-of-enrollment-speech-augmentation-for-target-speaker-extraction-2409.09589"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-foundation-models-for-music-understanding-2409.09601</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-foundation-models-for-music-understanding-2409.09601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-foundation-models-for-music-understanding-2409.09601"/></url>
<url><loc>https://scifaro.com/en/abs/compositional-audio-representation-learning-2409.09619</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compositional-audio-representation-learning-2409.09619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compositional-audio-representation-learning-2409.09619"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-acoustic-few-shot-classification-2409.09647</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-acoustic-few-shot-classification-2409.09647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-acoustic-few-shot-classification-2409.09647"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-video-to-audio-mapper-with-visual-scene-detection-2409.09823</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-video-to-audio-mapper-with-visual-scene-detection-2409.09823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-video-to-audio-mapper-with-visual-scene-detection-2409.09823"/></url>
<url><loc>https://scifaro.com/en/abs/diffatr-diffusion-based-generative-modeling-for-audio-text-retrieval-2409.10025</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffatr-diffusion-based-generative-modeling-for-audio-text-retrieval-2409.10025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffatr-diffusion-based-generative-modeling-for-audio-text-retrieval-2409.10025"/></url>
<url><loc>https://scifaro.com/en/abs/audio-driven-reinforcement-learning-for-head-orientation-in-naturalistic-environments-2409.10048</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-driven-reinforcement-learning-for-head-orientation-in-naturalistic-environments-2409.10048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-driven-reinforcement-learning-for-head-orientation-in-naturalistic-environments-2409.10048"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-contrastive-learning-for-source-speaker-tracing-2409.10072</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-contrastive-learning-for-source-speaker-tracing-2409.10072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-contrastive-learning-for-source-speaker-tracing-2409.10072"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-dysarthria-wake-up-word-spotting-an-end-to-end-approach-for-slt-2024-lrdwws-challenge-2409.10076</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-dysarthria-wake-up-word-spotting-an-end-to-end-approach-for-slt-2024-lrdwws-challenge-2409.10076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-dysarthria-wake-up-word-spotting-an-end-to-end-approach-for-slt-2024-lrdwws-challenge-2409.10076"/></url>
<url><loc>https://scifaro.com/en/abs/musiclime-explainable-multimodal-music-understanding-2409.10496</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musiclime-explainable-multimodal-music-understanding-2409.10496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musiclime-explainable-multimodal-music-understanding-2409.10496"/></url>
<url><loc>https://scifaro.com/en/abs/pdmx-a-large-scale-public-domain-musicxml-dataset-for-symbolic-music-processing-2409.10831</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pdmx-a-large-scale-public-domain-musicxml-dataset-for-symbolic-music-processing-2409.10831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pdmx-a-large-scale-public-domain-musicxml-dataset-for-symbolic-music-processing-2409.10831"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-for-analysis-of-police-radio-communication-2409.10858</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-for-analysis-of-police-radio-communication-2409.10858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-for-analysis-of-police-radio-communication-2409.10858"/></url>
<url><loc>https://scifaro.com/en/abs/single-stage-tts-with-masked-audio-token-modeling-and-semantic-knowledge-distillation-2409.11003</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-stage-tts-with-masked-audio-token-modeling-and-semantic-knowledge-distillation-2409.11003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-stage-tts-with-masked-audio-token-modeling-and-semantic-knowledge-distillation-2409.11003"/></url>
<url><loc>https://scifaro.com/en/abs/high-resolution-speech-restoration-with-latent-diffusion-model-2409.11145</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-resolution-speech-restoration-with-latent-diffusion-model-2409.11145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-resolution-speech-restoration-with-latent-diffusion-model-2409.11145"/></url>
<url><loc>https://scifaro.com/en/abs/learning-source-disentanglement-in-neural-audio-codec-2409.11228</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-source-disentanglement-in-neural-audio-codec-2409.11228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-source-disentanglement-in-neural-audio-codec-2409.11228"/></url>
<url><loc>https://scifaro.com/en/abs/the-sounds-of-home-a-speech-removed-residential-audio-dataset-for-sound-event-detection-2409.11262</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sounds-of-home-a-speech-removed-residential-audio-dataset-for-sound-event-detection-2409.11262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sounds-of-home-a-speech-removed-residential-audio-dataset-for-sound-event-detection-2409.11262"/></url>
<url><loc>https://scifaro.com/en/abs/lc-protonets-multi-label-few-shot-learning-for-world-music-audio-tagging-2409.11264</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lc-protonets-multi-label-few-shot-learning-for-world-music-audio-tagging-2409.11264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lc-protonets-multi-label-few-shot-learning-for-world-music-audio-tagging-2409.11264"/></url>
<url><loc>https://scifaro.com/en/abs/learning-spatially-aware-language-and-audio-embeddings-2409.11369</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-spatially-aware-language-and-audio-embeddings-2409.11369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-spatially-aware-language-and-audio-embeddings-2409.11369"/></url>
<url><loc>https://scifaro.com/en/abs/machine-listening-in-a-neonatal-intensive-care-unit-2409.11439</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-listening-in-a-neonatal-intensive-care-unit-2409.11439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-listening-in-a-neonatal-intensive-care-unit-2409.11439"/></url>
<url><loc>https://scifaro.com/en/abs/augment-drop-swap-improving-diversity-in-llm-captions-for-efficient-music-text-representation-learning-2409.11498</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augment-drop-swap-improving-diversity-in-llm-captions-for-efficient-music-text-representation-learning-2409.11498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augment-drop-swap-improving-diversity-in-llm-captions-for-efficient-music-text-representation-learning-2409.11498"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-from-coarse-to-fine-improving-neural-codec-language-model-via-multi-scale-speech-coding-and-generation-2409.11630</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-from-coarse-to-fine-improving-neural-codec-language-model-via-multi-scale-speech-coding-and-generation-2409.11630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-from-coarse-to-fine-improving-neural-codec-language-model-via-multi-scale-speech-coding-and-generation-2409.11630"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-native-speaker-shadowing-for-nonnative-speech-assessment-with-latent-speech-representations-2409.11742</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-native-speaker-shadowing-for-nonnative-speech-assessment-with-latent-speech-representations-2409.11742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-native-speaker-shadowing-for-nonnative-speech-assessment-with-latent-speech-representations-2409.11742"/></url>
<url><loc>https://scifaro.com/en/abs/salt-standardized-audio-event-label-taxonomy-2409.11746</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salt-standardized-audio-event-label-taxonomy-2409.11746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salt-standardized-audio-event-label-taxonomy-2409.11746"/></url>
<url><loc>https://scifaro.com/en/abs/meteor-melody-aware-texture-controllable-symbolic-orchestral-music-generation-via-transformer-vae-2409.11753</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meteor-melody-aware-texture-controllable-symbolic-orchestral-music-generation-via-transformer-vae-2409.11753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meteor-melody-aware-texture-controllable-symbolic-orchestral-music-generation-via-transformer-vae-2409.11753"/></url>
<url><loc>https://scifaro.com/en/abs/sound-based-spin-estimation-in-table-tennis-dataset-and-real-time-classification-pipeline-2409.11760</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-based-spin-estimation-in-table-tennis-dataset-and-real-time-classification-pipeline-2409.11760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-based-spin-estimation-in-table-tennis-dataset-and-real-time-classification-pipeline-2409.11760"/></url>
<url><loc>https://scifaro.com/en/abs/dpi-tts-directional-patch-interaction-for-fast-converging-and-style-temporal-modeling-in-text-to-speech-2409.11835</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpi-tts-directional-patch-interaction-for-fast-converging-and-style-temporal-modeling-in-text-to-speech-2409.11835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpi-tts-directional-patch-interaction-for-fast-converging-and-style-temporal-modeling-in-text-to-speech-2409.11835"/></url>
<url><loc>https://scifaro.com/en/abs/m2r-whisper-multi-stage-and-multi-scale-retrieval-augmentation-for-enhancing-whisper-2409.11889</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2r-whisper-multi-stage-and-multi-scale-retrieval-augmentation-for-enhancing-whisper-2409.11889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2r-whisper-multi-stage-and-multi-scale-retrieval-augmentation-for-enhancing-whisper-2409.11889"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-of-experts-fusion-for-fake-audio-detection-using-frozen-wav2vec-2-0-2409.11909</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-of-experts-fusion-for-fake-audio-detection-using-frozen-wav2vec-2-0-2409.11909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-of-experts-fusion-for-fake-audio-detection-using-frozen-wav2vec-2-0-2409.11909"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-acoustic-scene-classification-using-teacher-informed-confusing-class-instruction-2409.11964</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-acoustic-scene-classification-using-teacher-informed-confusing-class-instruction-2409.11964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-acoustic-scene-classification-using-teacher-informed-confusing-class-instruction-2409.11964"/></url>
<url><loc>https://scifaro.com/en/abs/wmcodec-end-to-end-neural-speech-codec-with-deep-watermarking-for-authenticity-verification-2409.12121</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wmcodec-end-to-end-neural-speech-codec-with-deep-watermarking-for-authenticity-verification-2409.12121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wmcodec-end-to-end-neural-speech-codec-with-deep-watermarking-for-authenticity-verification-2409.12121"/></url>
<url><loc>https://scifaro.com/en/abs/takin-a-cohort-of-superior-quality-zero-shot-speech-generation-models-2409.12139</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/takin-a-cohort-of-superior-quality-zero-shot-speech-generation-models-2409.12139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/takin-a-cohort-of-superior-quality-zero-shot-speech-generation-models-2409.12139"/></url>
<url><loc>https://scifaro.com/en/abs/the-unreliability-of-acoustic-systems-in-alzheimer-s-speech-datasets-with-heterogeneous-recording-conditions-2409.12170</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-unreliability-of-acoustic-systems-in-alzheimer-s-speech-datasets-with-heterogeneous-recording-conditions-2409.12170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-unreliability-of-acoustic-systems-in-alzheimer-s-speech-datasets-with-heterogeneous-recording-conditions-2409.12170"/></url>
<url><loc>https://scifaro.com/en/abs/prosodic-parameter-manipulation-in-tts-generated-speech-for-controlled-speech-generation-2409.12176</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosodic-parameter-manipulation-in-tts-generated-speech-for-controlled-speech-generation-2409.12176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosodic-parameter-manipulation-in-tts-generated-speech-for-controlled-speech-generation-2409.12176"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-music-separation-and-generation-using-multi-track-latent-diffusion-models-2409.12346</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-music-separation-and-generation-using-multi-track-latent-diffusion-models-2409.12346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-music-separation-and-generation-using-multi-track-latent-diffusion-models-2409.12346"/></url>
<url><loc>https://scifaro.com/en/abs/channel-aware-domain-adaptive-generative-adversarial-network-for-robust-speech-recognition-2409.12386</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-aware-domain-adaptive-generative-adversarial-network-for-robust-speech-recognition-2409.12386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-aware-domain-adaptive-generative-adversarial-network-for-robust-speech-recognition-2409.12386"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-and-real-time-binaural-speech-enhancement-model-with-spatial-cues-preservation-2409.12444</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-and-real-time-binaural-speech-enhancement-model-with-spatial-cues-preservation-2409.12444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-and-real-time-binaural-speech-enhancement-model-with-spatial-cues-preservation-2409.12444"/></url>
<url><loc>https://scifaro.com/en/abs/audioeditor-a-training-free-diffusion-based-audio-editing-framework-2409.12466</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audioeditor-a-training-free-diffusion-based-audio-editing-framework-2409.12466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audioeditor-a-training-free-diffusion-based-audio-editing-framework-2409.12466"/></url>
<url><loc>https://scifaro.com/en/abs/violindiff-enhancing-expressive-violin-synthesis-with-pitch-bend-conditioning-2409.12477</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/violindiff-enhancing-expressive-violin-synthesis-with-pitch-bend-conditioning-2409.12477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/violindiff-enhancing-expressive-violin-synthesis-with-pitch-bend-conditioning-2409.12477"/></url>
<url><loc>https://scifaro.com/en/abs/soundbeam-meets-m2d-target-sound-extraction-with-audio-foundation-model-2409.12528</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundbeam-meets-m2d-target-sound-extraction-with-audio-foundation-model-2409.12528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundbeam-meets-m2d-target-sound-extraction-with-audio-foundation-model-2409.12528"/></url>
<url><loc>https://scifaro.com/en/abs/fruitsmusic-a-real-world-corpus-of-japanese-idol-group-songs-2409.12549</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fruitsmusic-a-real-world-corpus-of-japanese-idol-group-songs-2409.12549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fruitsmusic-a-real-world-corpus-of-japanese-idol-group-songs-2409.12549"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-bat-song-syllable-representations-in-self-supervised-audio-encoders-2409.12634</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-bat-song-syllable-representations-in-self-supervised-audio-encoders-2409.12634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-bat-song-syllable-representations-in-self-supervised-audio-encoders-2409.12634"/></url>
<url><loc>https://scifaro.com/en/abs/m6-gpt-3-generating-multitrack-modifiable-multi-minute-midi-music-from-text-using-genetic-algorithms-probabilistic-methods-and-gpt-models-in-any-progression-and-time-signature-2409.12638</loc><lastmod>2025-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m6-gpt-3-generating-multitrack-modifiable-multi-minute-midi-music-from-text-using-genetic-algorithms-probabilistic-methods-and-gpt-models-in-any-progression-and-time-signature-2409.12638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m6-gpt-3-generating-multitrack-modifiable-multi-minute-midi-music-from-text-using-genetic-algorithms-probabilistic-methods-and-gpt-models-in-any-progression-and-time-signature-2409.12638"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-synthetic-training-data-for-speech-commands-from-asr-based-filtering-to-domain-adaptation-in-ssl-latent-space-2409.12745</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-synthetic-training-data-for-speech-commands-from-asr-based-filtering-to-domain-adaptation-in-ssl-latent-space-2409.12745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-synthetic-training-data-for-speech-commands-from-asr-based-filtering-to-domain-adaptation-in-ssl-latent-space-2409.12745"/></url>
<url><loc>https://scifaro.com/en/abs/diffeditor-enhancing-speech-editing-with-semantic-enrichment-and-acoustic-consistency-2409.12992</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffeditor-enhancing-speech-editing-with-semantic-enrichment-and-acoustic-consistency-2409.12992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffeditor-enhancing-speech-editing-with-semantic-enrichment-and-acoustic-consistency-2409.12992"/></url>
<url><loc>https://scifaro.com/en/abs/mucodec-ultra-low-bitrate-music-codec-2409.13216</loc><lastmod>2025-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mucodec-ultra-low-bitrate-music-codec-2409.13216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mucodec-ultra-low-bitrate-music-codec-2409.13216"/></url>
<url><loc>https://scifaro.com/en/abs/audio-codec-augmentation-for-robust-collaborative-watermarking-of-speech-synthesis-2409.13382</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-codec-augmentation-for-robust-collaborative-watermarking-of-speech-synthesis-2409.13382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-codec-augmentation-for-robust-collaborative-watermarking-of-speech-synthesis-2409.13382"/></url>
<url><loc>https://scifaro.com/en/abs/diffsound-differentiable-modal-sound-rendering-and-inverse-rendering-for-diverse-inference-tasks-2409.13486</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsound-differentiable-modal-sound-rendering-and-inverse-rendering-for-diverse-inference-tasks-2409.13486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsound-differentiable-modal-sound-rendering-and-inverse-rendering-for-diverse-inference-tasks-2409.13486"/></url>
<url><loc>https://scifaro.com/en/abs/a-sound-description-exploring-prompt-templates-and-class-descriptions-to-enhance-zero-shot-audio-classification-2409.13676</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sound-description-exploring-prompt-templates-and-class-descriptions-to-enhance-zero-shot-audio-classification-2409.13676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sound-description-exploring-prompt-templates-and-class-descriptions-to-enhance-zero-shot-audio-classification-2409.13676"/></url>
<url><loc>https://scifaro.com/en/abs/a-microscopic-investigation-of-the-effect-of-random-envelope-fluctuations-on-phoneme-in-noise-perception-2409.13765</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-microscopic-investigation-of-the-effect-of-random-envelope-fluctuations-on-phoneme-in-noise-perception-2409.13765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-microscopic-investigation-of-the-effect-of-random-envelope-fluctuations-on-phoneme-in-noise-perception-2409.13765"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-knowledge-transfer-for-underwater-acoustic-classification-using-pre-trained-models-2409.13878</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-knowledge-transfer-for-underwater-acoustic-classification-using-pre-trained-models-2409.13878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-knowledge-transfer-for-underwater-acoustic-classification-using-pre-trained-models-2409.13878"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-time-frequency-feature-combinations-with-histogram-layer-time-delay-neural-networks-2409.13881</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-time-frequency-feature-combinations-with-histogram-layer-time-delay-neural-networks-2409.13881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-time-frequency-feature-combinations-with-histogram-layer-time-delay-neural-networks-2409.13881"/></url>
<url><loc>https://scifaro.com/en/abs/ptq4adm-post-training-quantization-for-efficient-text-conditional-audio-diffusion-models-2409.13894</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ptq4adm-post-training-quantization-for-efficient-text-conditional-audio-diffusion-models-2409.13894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ptq4adm-post-training-quantization-for-efficient-text-conditional-audio-diffusion-models-2409.13894"/></url>
<url><loc>https://scifaro.com/en/abs/training-large-asr-encoders-with-differential-privacy-2409.13953</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-large-asr-encoders-with-differential-privacy-2409.13953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-large-asr-encoders-with-differential-privacy-2409.13953"/></url>
<url><loc>https://scifaro.com/en/abs/echo-environmental-sound-classification-with-hierarchical-ontology-guided-semi-supervised-learning-2409.14043</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echo-environmental-sound-classification-with-hierarchical-ontology-guided-semi-supervised-learning-2409.14043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echo-environmental-sound-classification-with-hierarchical-ontology-guided-semi-supervised-learning-2409.14043"/></url>
<url><loc>https://scifaro.com/en/abs/amt-apc-automatic-piano-cover-by-fine-tuning-an-automatic-music-transcription-model-2409.14086</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amt-apc-automatic-piano-cover-by-fine-tuning-an-automatic-music-transcription-model-2409.14086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amt-apc-automatic-piano-cover-by-fine-tuning-an-automatic-music-transcription-model-2409.14086"/></url>
<url><loc>https://scifaro.com/en/abs/what-are-they-doing-joint-audio-speech-co-reasoning-2409.14526</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-are-they-doing-joint-audio-speech-co-reasoning-2409.14526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-are-they-doing-joint-audio-speech-co-reasoning-2409.14526"/></url>
<url><loc>https://scifaro.com/en/abs/songtrans-an-unified-song-transcription-and-alignment-method-for-lyrics-and-notes-2409.14619</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songtrans-an-unified-song-transcription-and-alignment-method-for-lyrics-and-notes-2409.14619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songtrans-an-unified-song-transcription-and-alignment-method-for-lyrics-and-notes-2409.14619"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-glot-high-fidelity-neural-formant-synthesis-with-differentiable-resonant-filters-2409.14823</loc><lastmod>2026-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-glot-high-fidelity-neural-formant-synthesis-with-differentiable-resonant-filters-2409.14823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-glot-high-fidelity-neural-formant-synthesis-with-differentiable-resonant-filters-2409.14823"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-based-privacy-through-adversarial-information-hiding-2409.14919</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-based-privacy-through-adversarial-information-hiding-2409.14919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-based-privacy-through-adversarial-information-hiding-2409.14919"/></url>
<url><loc>https://scifaro.com/en/abs/blind-spatial-impulse-response-generation-from-separate-room-and-scene-specific-information-2409.14971</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-spatial-impulse-response-generation-from-separate-room-and-scene-specific-information-2409.14971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-spatial-impulse-response-generation-from-separate-room-and-scene-specific-information-2409.14971"/></url>
<url><loc>https://scifaro.com/en/abs/gald-se-guided-anisotropic-lightweight-diffusion-for-efficient-speech-enhancement-2409.15101</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gald-se-guided-anisotropic-lightweight-diffusion-for-efficient-speech-enhancement-2409.15101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gald-se-guided-anisotropic-lightweight-diffusion-for-efficient-speech-enhancement-2409.15101"/></url>
<url><loc>https://scifaro.com/en/abs/lova-long-form-video-to-audio-generation-2409.15157</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lova-long-form-video-to-audio-generation-2409.15157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lova-long-form-video-to-audio-generation-2409.15157"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-learning-via-a-negative-selection-strategy-for-few-shot-bioacoustic-event-detection-2409.15168</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-learning-via-a-negative-selection-strategy-for-few-shot-bioacoustic-event-detection-2409.15168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-learning-via-a-negative-selection-strategy-for-few-shot-bioacoustic-event-detection-2409.15168"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-with-critical-analysis-for-deepfake-speech-detection-2409.15180</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-with-critical-analysis-for-deepfake-speech-detection-2409.15180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-with-critical-analysis-for-deepfake-speech-detection-2409.15180"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-learning-based-sound-propagation-for-virtual-and-real-world-audio-processing-applications-2409.15335</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-learning-based-sound-propagation-for-virtual-and-real-world-audio-processing-applications-2409.15335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-learning-based-sound-propagation-for-virtual-and-real-world-audio-processing-applications-2409.15335"/></url>
<url><loc>https://scifaro.com/en/abs/generalization-in-birdsong-classification-impact-of-transfer-learning-methods-and-dataset-characteristics-2409.15383</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalization-in-birdsong-classification-impact-of-transfer-learning-methods-and-dataset-characteristics-2409.15383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalization-in-birdsong-classification-impact-of-transfer-learning-methods-and-dataset-characteristics-2409.15383"/></url>
<url><loc>https://scifaro.com/en/abs/voiceguider-enhancing-out-of-domain-performance-in-parameter-efficient-speaker-adaptive-text-to-speech-via-autoguidance-2409.15759</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceguider-enhancing-out-of-domain-performance-in-parameter-efficient-speaker-adaptive-text-to-speech-via-autoguidance-2409.15759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceguider-enhancing-out-of-domain-performance-in-parameter-efficient-speaker-adaptive-text-to-speech-via-autoguidance-2409.15759"/></url>
<url><loc>https://scifaro.com/en/abs/nanovoice-efficient-speaker-adaptive-text-to-speech-for-multiple-speakers-2409.15760</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nanovoice-efficient-speaker-adaptive-text-to-speech-for-multiple-speakers-2409.15760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nanovoice-efficient-speaker-adaptive-text-to-speech-for-multiple-speakers-2409.15760"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-calibration-of-powerset-speaker-diarization-models-2409.15885</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-calibration-of-powerset-speaker-diarization-models-2409.15885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-calibration-of-powerset-speaker-diarization-models-2409.15885"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-code-switching-asr-with-mixture-of-experts-enhanced-speech-conditioned-llm-2409.15905</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-code-switching-asr-with-mixture-of-experts-enhanced-speech-conditioned-llm-2409.15905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-code-switching-asr-with-mixture-of-experts-enhanced-speech-conditioned-llm-2409.15905"/></url>
<url><loc>https://scifaro.com/en/abs/asd-diffusion-anomalous-sound-detection-with-diffusion-models-2409.15957</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asd-diffusion-anomalous-sound-detection-with-diffusion-models-2409.15957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asd-diffusion-anomalous-sound-detection-with-diffusion-models-2409.15957"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-age-and-identity-with-a-mutual-information-minimization-approach-for-cross-age-speaker-verification-2409.15974</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-age-and-identity-with-a-mutual-information-minimization-approach-for-cross-age-speaker-verification-2409.15974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-age-and-identity-with-a-mutual-information-minimization-approach-for-cross-age-speaker-verification-2409.15974"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-mixture-of-experts-for-improved-speech-deepfake-detection-2409.16077</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-mixture-of-experts-for-improved-speech-deepfake-detection-2409.16077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-mixture-of-experts-for-improved-speech-deepfake-detection-2409.16077"/></url>
<url><loc>https://scifaro.com/en/abs/facial-expression-enhanced-tts-combining-face-representation-and-emotion-intensity-for-adaptive-speech-2409.16203</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/facial-expression-enhanced-tts-combining-face-representation-and-emotion-intensity-for-adaptive-speech-2409.16203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/facial-expression-enhanced-tts-combining-face-representation-and-emotion-intensity-for-adaptive-speech-2409.16203"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-acoustic-features-for-robust-asr-2409.16399</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-acoustic-features-for-robust-asr-2409.16399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-acoustic-features-for-robust-asr-2409.16399"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-perceptual-metrics-on-music-representation-learning-for-genre-classification-2409.17069</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-perceptual-metrics-on-music-representation-learning-for-genre-classification-2409.17069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-perceptual-metrics-on-music-representation-learning-for-genre-classification-2409.17069"/></url>
<url><loc>https://scifaro.com/en/abs/spoofceleb-speech-deepfake-detection-and-sasv-in-the-wild-2409.17285</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofceleb-speech-deepfake-detection-and-sasv-in-the-wild-2409.17285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofceleb-speech-deepfake-detection-and-sasv-in-the-wild-2409.17285"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-polyglot-voices-by-leveraging-cross-lingual-fine-tuning-in-any-to-one-voice-conversion-2409.17387</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-polyglot-voices-by-leveraging-cross-lingual-fine-tuning-in-any-to-one-voice-conversion-2409.17387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-polyglot-voices-by-leveraging-cross-lingual-fine-tuning-in-any-to-one-voice-conversion-2409.17387"/></url>
<url><loc>https://scifaro.com/en/abs/freeze-and-learn-continual-learning-with-selective-freezing-for-speech-deepfake-detection-2409.17598</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freeze-and-learn-continual-learning-with-selective-freezing-for-speech-deepfake-detection-2409.17598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freeze-and-learn-continual-learning-with-selective-freezing-for-speech-deepfake-detection-2409.17598"/></url>
<url><loc>https://scifaro.com/en/abs/prototype-based-masked-audio-model-for-self-supervised-learning-of-sound-event-detection-2409.17656</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prototype-based-masked-audio-model-for-self-supervised-learning-of-sound-event-detection-2409.17656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prototype-based-masked-audio-model-for-self-supervised-learning-of-sound-event-detection-2409.17656"/></url>
<url><loc>https://scifaro.com/en/abs/a-fly-on-the-wall-exploiting-acoustic-side-channels-in-differential-pressure-sensors-2409.18213</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fly-on-the-wall-exploiting-acoustic-side-channels-in-differential-pressure-sensors-2409.18213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fly-on-the-wall-exploiting-acoustic-side-channels-in-differential-pressure-sensors-2409.18213"/></url>
<url><loc>https://scifaro.com/en/abs/towards-sub-millisecond-latency-real-time-speech-enhancement-models-on-hearables-2409.18239</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-sub-millisecond-latency-real-time-speech-enhancement-models-on-hearables-2409.18239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-sub-millisecond-latency-real-time-speech-enhancement-models-on-hearables-2409.18239"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-prompting-improving-emotion-intensity-and-speaker-consistency-in-zero-shot-tts-2409.18512</loc><lastmod>2026-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-prompting-improving-emotion-intensity-and-speaker-consistency-in-zero-shot-tts-2409.18512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-prompting-improving-emotion-intensity-and-speaker-consistency-in-zero-shot-tts-2409.18512"/></url>
<url><loc>https://scifaro.com/en/abs/xwsb-a-blend-system-utilizing-xls-r-and-wavlm-with-sls-classifier-detection-system-for-svdd-2024-challenge-2409.18558</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xwsb-a-blend-system-utilizing-xls-r-and-wavlm-with-sls-classifier-detection-system-for-svdd-2024-challenge-2409.18558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xwsb-a-blend-system-utilizing-xls-r-and-wavlm-with-sls-classifier-detection-system-for-svdd-2024-challenge-2409.18558"/></url>
<url><loc>https://scifaro.com/en/abs/childmandarin-a-comprehensive-mandarin-speech-dataset-for-young-children-aged-3-5-2409.18584</loc><lastmod>2025-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/childmandarin-a-comprehensive-mandarin-speech-dataset-for-young-children-aged-3-5-2409.18584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/childmandarin-a-comprehensive-mandarin-speech-dataset-for-young-children-aged-3-5-2409.18584"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-linguistic-feature-extraction-for-enhancing-multi-lingual-and-low-resource-text-to-speech-2409.18622</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-linguistic-feature-extraction-for-enhancing-multi-lingual-and-low-resource-text-to-speech-2409.18622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-linguistic-feature-extraction-for-enhancing-multi-lingual-and-low-resource-text-to-speech-2409.18622"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-single-audio-advancing-multi-audio-processing-in-audio-large-language-models-2409.18680</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-single-audio-advancing-multi-audio-processing-in-audio-large-language-models-2409.18680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-single-audio-advancing-multi-audio-processing-in-audio-large-language-models-2409.18680"/></url>
<url><loc>https://scifaro.com/en/abs/opensep-leveraging-large-language-models-with-textual-inversion-for-open-world-audio-separation-2409.19270</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/opensep-leveraging-large-language-models-with-textual-inversion-for-open-world-audio-separation-2409.19270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/opensep-leveraging-large-language-models-with-textual-inversion-for-open-world-audio-separation-2409.19270"/></url>
<url><loc>https://scifaro.com/en/abs/sustaining-model-performance-for-covid-19-detection-from-dynamic-audio-data-development-and-evaluation-of-a-comprehensive-drift-adaptive-framework-2409.19300</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sustaining-model-performance-for-covid-19-detection-from-dynamic-audio-data-development-and-evaluation-of-a-comprehensive-drift-adaptive-framework-2409.19300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sustaining-model-performance-for-covid-19-detection-from-dynamic-audio-data-development-and-evaluation-of-a-comprehensive-drift-adaptive-framework-2409.19300"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-clustering-techniques-for-speech-signal-enhancement-a-review-and-metanalysis-of-fuzzy-c-means-k-means-and-kernel-fuzzy-c-means-methods-2409.19448</loc><lastmod>2025-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-clustering-techniques-for-speech-signal-enhancement-a-review-and-metanalysis-of-fuzzy-c-means-k-means-and-kernel-fuzzy-c-means-methods-2409.19448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-clustering-techniques-for-speech-signal-enhancement-a-review-and-metanalysis-of-fuzzy-c-means-k-means-and-kernel-fuzzy-c-means-methods-2409.19448"/></url>
<url><loc>https://scifaro.com/en/abs/learning-frame-wise-emotion-intensity-for-audio-driven-talking-head-generation-2409.19501</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-frame-wise-emotion-intensity-for-audio-driven-talking-head-generation-2409.19501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-frame-wise-emotion-intensity-for-audio-driven-talking-head-generation-2409.19501"/></url>
<url><loc>https://scifaro.com/en/abs/quantitative-analysis-of-audio-visual-tasks-an-information-theoretic-perspective-2409.19575</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantitative-analysis-of-audio-visual-tasks-an-information-theoretic-perspective-2409.19575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantitative-analysis-of-audio-visual-tasks-an-information-theoretic-perspective-2409.19575"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-framework-for-robust-speech-emotion-recognition-using-target-speaker-extraction-in-human-speech-noise-conditions-2409.19585</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-framework-for-robust-speech-emotion-recognition-using-target-speaker-extraction-in-human-speech-noise-conditions-2409.19585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-framework-for-robust-speech-emotion-recognition-using-target-speaker-extraction-in-human-speech-noise-conditions-2409.19585"/></url>
<url><loc>https://scifaro.com/en/abs/solution-for-temporal-sound-localisation-task-of-eccv-second-perception-test-challenge-2024-2409.19595</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/solution-for-temporal-sound-localisation-task-of-eccv-second-perception-test-challenge-2024-2409.19595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/solution-for-temporal-sound-localisation-task-of-eccv-second-perception-test-challenge-2024-2409.19595"/></url>
<url><loc>https://scifaro.com/en/abs/improved-architecture-for-high-resolution-piano-transcription-to-efficiently-capture-acoustic-characteristics-of-music-signals-2409.19614</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-architecture-for-high-resolution-piano-transcription-to-efficiently-capture-acoustic-characteristics-of-music-signals-2409.19614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-architecture-for-high-resolution-piano-transcription-to-efficiently-capture-acoustic-characteristics-of-music-signals-2409.19614"/></url>
<url><loc>https://scifaro.com/en/abs/infantcrynet-a-data-driven-framework-for-intelligent-analysis-of-infant-cries-2409.19689</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infantcrynet-a-data-driven-framework-for-intelligent-analysis-of-infant-cries-2409.19689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infantcrynet-a-data-driven-framework-for-intelligent-analysis-of-infant-cries-2409.19689"/></url>
<url><loc>https://scifaro.com/en/abs/palm-few-shot-prompt-learning-for-audio-language-models-2409.19806</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/palm-few-shot-prompt-learning-for-audio-language-models-2409.19806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/palm-few-shot-prompt-learning-for-audio-language-models-2409.19806"/></url>
<url><loc>https://scifaro.com/en/abs/hdmole-mixture-of-lora-experts-with-hierarchical-routing-and-dynamic-thresholds-for-fine-tuning-llm-based-asr-models-2409.19878</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hdmole-mixture-of-lora-experts-with-hierarchical-routing-and-dynamic-thresholds-for-fine-tuning-llm-based-asr-models-2409.19878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hdmole-mixture-of-lora-experts-with-hierarchical-routing-and-dynamic-thresholds-for-fine-tuning-llm-based-asr-models-2409.19878"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-high-precision-sound-source-localization-at-low-frequencies-based-on-convolutional-neural-network-2409.20031</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-high-precision-sound-source-localization-at-low-frequencies-based-on-convolutional-neural-network-2409.20031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-high-precision-sound-source-localization-at-low-frequencies-based-on-convolutional-neural-network-2409.20031"/></url>
<url><loc>https://scifaro.com/en/abs/melody-guided-music-generation-2409.20196</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melody-guided-music-generation-2409.20196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melody-guided-music-generation-2409.20196"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-piano-performance-midi-to-score-conversion-with-transformers-2410.00210</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-piano-performance-midi-to-score-conversion-with-transformers-2410.00210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-piano-performance-midi-to-score-conversion-with-transformers-2410.00210"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-text-to-music-models-with-language-models-composing-long-structured-music-pieces-2410.00344</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-text-to-music-models-with-language-models-composing-long-structured-music-pieces-2410.00344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-text-to-music-models-with-language-models-composing-long-structured-music-pieces-2410.00344"/></url>
<url><loc>https://scifaro.com/en/abs/contribution-of-soundscape-appropriateness-to-soundscape-quality-assessment-in-space-a-mediating-variable-affecting-acoustic-comfort-2410.00667</loc><lastmod>2024-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contribution-of-soundscape-appropriateness-to-soundscape-quality-assessment-in-space-a-mediating-variable-affecting-acoustic-comfort-2410.00667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contribution-of-soundscape-appropriateness-to-soundscape-quality-assessment-in-space-a-mediating-variable-affecting-acoustic-comfort-2410.00667"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-text-to-speech-from-continuous-text-streams-2410.00767</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-from-continuous-text-streams-2410.00767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-from-continuous-text-streams-2410.00767"/></url>
<url><loc>https://scifaro.com/en/abs/improving-curriculum-learning-for-target-speaker-extraction-with-synthetic-speakers-2410.00811</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-curriculum-learning-for-target-speaker-extraction-with-synthetic-speakers-2410.00811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-curriculum-learning-for-target-speaker-extraction-with-synthetic-speakers-2410.00811"/></url>
<url><loc>https://scifaro.com/en/abs/vhasr-a-multimodal-speech-recognition-system-with-vision-hotwords-2410.00822</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vhasr-a-multimodal-speech-recognition-system-with-vision-hotwords-2410.00822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vhasr-a-multimodal-speech-recognition-system-with-vision-hotwords-2410.00822"/></url>
<url><loc>https://scifaro.com/en/abs/do-music-generation-models-encode-music-theory-2410.00872</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-music-generation-models-encode-music-theory-2410.00872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-music-generation-models-encode-music-theory-2410.00872"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-sound-classification-with-the-broad-sound-taxonomy-and-dataset-2410.00980</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-sound-classification-with-the-broad-sound-taxonomy-and-dataset-2410.00980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-sound-classification-with-the-broad-sound-taxonomy-and-dataset-2410.00980"/></url>
<url><loc>https://scifaro.com/en/abs/takin-vc-expressive-zero-shot-voice-conversion-via-adaptive-hybrid-content-encoding-and-enhanced-timbre-modeling-2410.01350</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/takin-vc-expressive-zero-shot-voice-conversion-via-adaptive-hybrid-content-encoding-and-enhanced-timbre-modeling-2410.01350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/takin-vc-expressive-zero-shot-voice-conversion-via-adaptive-hybrid-content-encoding-and-enhanced-timbre-modeling-2410.01350"/></url>
<url><loc>https://scifaro.com/en/abs/tiger-time-frequency-interleaved-gain-extraction-and-reconstruction-for-efficient-speech-separation-2410.01469</loc><lastmod>2026-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiger-time-frequency-interleaved-gain-extraction-and-reconstruction-for-efficient-speech-separation-2410.01469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiger-time-frequency-interleaved-gain-extraction-and-reconstruction-for-efficient-speech-separation-2410.01469"/></url>
<url><loc>https://scifaro.com/en/abs/sonicsim-a-customizable-simulation-platform-for-speech-processing-in-moving-sound-source-scenarios-2410.01481</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonicsim-a-customizable-simulation-platform-for-speech-processing-in-moving-sound-source-scenarios-2410.01481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonicsim-a-customizable-simulation-platform-for-speech-processing-in-moving-sound-source-scenarios-2410.01481"/></url>
<url><loc>https://scifaro.com/en/abs/pertok-expressive-encoding-and-modeling-of-symbolic-musical-ideas-and-variations-2410.02060</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pertok-expressive-encoding-and-modeling-of-symbolic-musical-ideas-and-variations-2410.02060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pertok-expressive-encoding-and-modeling-of-symbolic-musical-ideas-and-variations-2410.02060"/></url>
<url><loc>https://scifaro.com/en/abs/generating-symbolic-music-from-natural-language-prompts-using-an-llm-enhanced-dataset-2410.02084</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-symbolic-music-from-natural-language-prompts-using-an-llm-enhanced-dataset-2410.02084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-symbolic-music-from-natural-language-prompts-using-an-llm-enhanced-dataset-2410.02084"/></url>
<url><loc>https://scifaro.com/en/abs/mdsgen-fast-and-efficient-masked-diffusion-temporal-aware-transformers-for-open-domain-sound-generation-2410.02130</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdsgen-fast-and-efficient-masked-diffusion-temporal-aware-transformers-for-open-domain-sound-generation-2410.02130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdsgen-fast-and-efficient-masked-diffusion-temporal-aware-transformers-for-open-domain-sound-generation-2410.02130"/></url>
<url><loc>https://scifaro.com/en/abs/soundmorpher-perceptually-uniform-sound-morphing-with-diffusion-model-2410.02144</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundmorpher-perceptually-uniform-sound-morphing-with-diffusion-model-2410.02144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundmorpher-perceptually-uniform-sound-morphing-with-diffusion-model-2410.02144"/></url>
<url><loc>https://scifaro.com/en/abs/a-pilot-study-of-applying-sequence-to-sequence-voice-conversion-to-evaluate-the-intelligibility-of-l2-speech-using-a-native-speaker-s-shadowings-2410.02239</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pilot-study-of-applying-sequence-to-sequence-voice-conversion-to-evaluate-the-intelligibility-of-l2-speech-using-a-native-speaker-s-shadowings-2410.02239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pilot-study-of-applying-sequence-to-sequence-voice-conversion-to-evaluate-the-intelligibility-of-l2-speech-using-a-native-speaker-s-shadowings-2410.02239"/></url>
<url><loc>https://scifaro.com/en/abs/collap-contrastive-long-form-language-audio-pretraining-with-musical-temporal-structure-augmentation-2410.02271</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collap-contrastive-long-form-language-audio-pretraining-with-musical-temporal-structure-augmentation-2410.02271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collap-contrastive-long-form-language-audio-pretraining-with-musical-temporal-structure-augmentation-2410.02271"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-variational-autoencoders-for-spectrogram-compression-in-automatic-speech-recognition-2410.02560</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-variational-autoencoders-for-spectrogram-compression-in-automatic-speech-recognition-2410.02560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-variational-autoencoders-for-spectrogram-compression-in-automatic-speech-recognition-2410.02560"/></url>
<url><loc>https://scifaro.com/en/abs/enriching-music-descriptions-with-a-finetuned-llm-and-metadata-for-text-to-music-retrieval-2410.03264</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enriching-music-descriptions-with-a-finetuned-llm-and-metadata-for-text-to-music-retrieval-2410.03264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enriching-music-descriptions-with-a-finetuned-llm-and-metadata-for-text-to-music-retrieval-2410.03264"/></url>
<url><loc>https://scifaro.com/en/abs/audio-agent-leveraging-llms-for-audio-generation-editing-and-composition-2410.03335</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-agent-leveraging-llms-for-audio-generation-editing-and-composition-2410.03335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-agent-leveraging-llms-for-audio-generation-editing-and-composition-2410.03335"/></url>
<url><loc>https://scifaro.com/en/abs/soundsignature-what-type-of-music-do-you-like-2410.03375</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundsignature-what-type-of-music-do-you-like-2410.03375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundsignature-what-type-of-music-do-you-like-2410.03375"/></url>
<url><loc>https://scifaro.com/en/abs/biodenoising-animal-vocalization-denoising-without-access-to-clean-data-2410.03427</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/biodenoising-animal-vocalization-denoising-without-access-to-clean-data-2410.03427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/biodenoising-animal-vocalization-denoising-without-access-to-clean-data-2410.03427"/></url>
<url><loc>https://scifaro.com/en/abs/generative-semantic-communication-for-text-to-speech-synthesis-2410.03459</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-semantic-communication-for-text-to-speech-synthesis-2410.03459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-semantic-communication-for-text-to-speech-synthesis-2410.03459"/></url>
<url><loc>https://scifaro.com/en/abs/a-quest-through-interconnected-datasets-lessons-from-highly-cited-icassp-papers-2410.03676</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-quest-through-interconnected-datasets-lessons-from-highly-cited-icassp-papers-2410.03676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-quest-through-interconnected-datasets-lessons-from-highly-cited-icassp-papers-2410.03676"/></url>
<url><loc>https://scifaro.com/en/abs/accent-conversion-using-discrete-units-with-parallel-data-synthesized-from-controllable-accented-tts-2410.03734</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-conversion-using-discrete-units-with-parallel-data-synthesized-from-controllable-accented-tts-2410.03734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-conversion-using-discrete-units-with-parallel-data-synthesized-from-controllable-accented-tts-2410.03734"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-streaming-llm-for-speech-recognition-2410.03752</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-streaming-llm-for-speech-recognition-2410.03752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-streaming-llm-for-speech-recognition-2410.03752"/></url>
<url><loc>https://scifaro.com/en/abs/sonique-video-background-music-generation-using-unpaired-audio-visual-data-2410.03879</loc><lastmod>2025-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonique-video-background-music-generation-using-unpaired-audio-visual-data-2410.03879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonique-video-background-music-generation-using-unpaired-audio-visual-data-2410.03879"/></url>
<url><loc>https://scifaro.com/en/abs/did-you-hear-that-introducing-aadg-a-framework-for-generating-benchmark-data-in-audio-anomaly-detection-2410.03904</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/did-you-hear-that-introducing-aadg-a-framework-for-generating-benchmark-data-in-audio-anomaly-detection-2410.03904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/did-you-hear-that-introducing-aadg-a-framework-for-generating-benchmark-data-in-audio-anomaly-detection-2410.03904"/></url>
<url><loc>https://scifaro.com/en/abs/the-ocon-model-an-old-but-green-solution-for-distributable-supervised-classification-for-acoustic-monitoring-in-smart-cities-2410.04098</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ocon-model-an-old-but-green-solution-for-distributable-supervised-classification-for-acoustic-monitoring-in-smart-cities-2410.04098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ocon-model-an-old-but-green-solution-for-distributable-supervised-classification-for-acoustic-monitoring-in-smart-cities-2410.04098"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-and-robust-long-form-speech-recognition-with-hybrid-h3-conformer-2410.04159</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-and-robust-long-form-speech-recognition-with-hybrid-h3-conformer-2410.04159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-and-robust-long-form-speech-recognition-with-hybrid-h3-conformer-2410.04159"/></url>
<url><loc>https://scifaro.com/en/abs/where-are-we-in-audio-deepfake-detection-a-systematic-analysis-over-generative-and-detection-models-2410.04324</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/where-are-we-in-audio-deepfake-detection-a-systematic-analysis-over-generative-and-detection-models-2410.04324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/where-are-we-in-audio-deepfake-detection-a-systematic-analysis-over-generative-and-detection-models-2410.04324"/></url>
<url><loc>https://scifaro.com/en/abs/configurable-multilingual-asr-with-speech-summary-representations-2410.04478</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/configurable-multilingual-asr-with-speech-summary-representations-2410.04478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/configurable-multilingual-asr-with-speech-summary-representations-2410.04478"/></url>
<url><loc>https://scifaro.com/en/abs/unimumo-unified-text-music-and-motion-generation-2410.04534</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unimumo-unified-text-music-and-motion-generation-2410.04534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unimumo-unified-text-music-and-motion-generation-2410.04534"/></url>
<url><loc>https://scifaro.com/en/abs/demo-of-zero-shot-guitar-amplifier-modelling-enhancing-modeling-with-hyper-neural-networks-2410.04702</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/demo-of-zero-shot-guitar-amplifier-modelling-enhancing-modeling-with-hyper-neural-networks-2410.04702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/demo-of-zero-shot-guitar-amplifier-modelling-enhancing-modeling-with-hyper-neural-networks-2410.04702"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-and-estimation-of-vocal-tract-and-glottal-source-parameters-using-armax-lf-model-2410.04704</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-and-estimation-of-vocal-tract-and-glottal-source-parameters-using-armax-lf-model-2410.04704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-and-estimation-of-vocal-tract-and-glottal-source-parameters-using-armax-lf-model-2410.04704"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-based-multi-level-feature-fusion-for-voice-disorder-diagnosis-2410.04797</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-based-multi-level-feature-fusion-for-voice-disorder-diagnosis-2410.04797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-based-multi-level-feature-fusion-for-voice-disorder-diagnosis-2410.04797"/></url>
<url><loc>https://scifaro.com/en/abs/stage-wise-and-prior-aware-neural-speech-phase-prediction-2410.04990</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stage-wise-and-prior-aware-neural-speech-phase-prediction-2410.04990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stage-wise-and-prior-aware-neural-speech-phase-prediction-2410.04990"/></url>
<url><loc>https://scifaro.com/en/abs/relunet-relative-channel-fusion-u-net-for-multichannel-speech-enhancement-2410.05019</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relunet-relative-channel-fusion-u-net-for-multichannel-speech-enhancement-2410.05019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relunet-relative-channel-fusion-u-net-for-multichannel-speech-enhancement-2410.05019"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-representations-using-contrastive-losses-on-multi-scale-features-2410.05037</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-representations-using-contrastive-losses-on-multi-scale-features-2410.05037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-representations-using-contrastive-losses-on-multi-scale-features-2410.05037"/></url>
<url><loc>https://scifaro.com/en/abs/presto-distilling-steps-and-layers-for-accelerating-music-generation-2410.05167</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/presto-distilling-steps-and-layers-for-accelerating-music-generation-2410.05167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/presto-distilling-steps-and-layers-for-accelerating-music-generation-2410.05167"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-unsupervised-audio-visual-speech-enhancement-2410.05301</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-unsupervised-audio-visual-speech-enhancement-2410.05301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-unsupervised-audio-visual-speech-enhancement-2410.05301"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-talker-identity-aids-with-improving-speech-recognition-in-adversarial-environments-2410.05423</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-talker-identity-aids-with-improving-speech-recognition-in-adversarial-environments-2410.05423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-talker-identity-aids-with-improving-speech-recognition-in-adversarial-environments-2410.05423"/></url>
<url><loc>https://scifaro.com/en/abs/fgcl-fine-grained-contrastive-learning-for-mandarin-stuttering-event-detection-2410.05647</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fgcl-fine-grained-contrastive-learning-for-mandarin-stuttering-event-detection-2410.05647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fgcl-fine-grained-contrastive-learning-for-mandarin-stuttering-event-detection-2410.05647"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-channel-speaker-extraction-and-binaural-speech-synthesis-2410.05739</loc><lastmod>2025-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-channel-speaker-extraction-and-binaural-speech-synthesis-2410.05739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-channel-speaker-extraction-and-binaural-speech-synthesis-2410.05739"/></url>
<url><loc>https://scifaro.com/en/abs/finally-fast-and-universal-speech-enhancement-with-studio-like-quality-2410.05920</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/finally-fast-and-universal-speech-enhancement-with-studio-like-quality-2410.05920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/finally-fast-and-universal-speech-enhancement-with-studio-like-quality-2410.05920"/></url>
<url><loc>https://scifaro.com/en/abs/variable-bitrate-residual-vector-quantization-for-audio-coding-2410.06016</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variable-bitrate-residual-vector-quantization-for-audio-coding-2410.06016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variable-bitrate-residual-vector-quantization-for-audio-coding-2410.06016"/></url>
<url><loc>https://scifaro.com/en/abs/poliphone-a-dataset-for-smartphone-model-identification-from-audio-recordings-2410.06221</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/poliphone-a-dataset-for-smartphone-model-identification-from-audio-recordings-2410.06221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/poliphone-a-dataset-for-smartphone-model-identification-from-audio-recordings-2410.06221"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-based-segmentation-model-for-speaker-diarization-2410.06459</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-based-segmentation-model-for-speaker-diarization-2410.06459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-based-segmentation-model-for-speaker-diarization-2410.06459"/></url>
<url><loc>https://scifaro.com/en/abs/src-gaudio-sampling-rate-controlled-audio-generation-2410.06544</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/src-gaudio-sampling-rate-controlled-audio-generation-2410.06544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/src-gaudio-sampling-rate-controlled-audio-generation-2410.06544"/></url>
<url><loc>https://scifaro.com/en/abs/can-deepfake-speech-be-reliably-detected-2410.06572</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-deepfake-speech-be-reliably-detected-2410.06572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-deepfake-speech-be-reliably-detected-2410.06572"/></url>
<url><loc>https://scifaro.com/en/abs/bahasa-harmony-a-comprehensive-dataset-for-bahasa-text-to-speech-synthesis-with-discrete-codec-modeling-of-engen-tts-2410.06608</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bahasa-harmony-a-comprehensive-dataset-for-bahasa-text-to-speech-synthesis-with-discrete-codec-modeling-of-engen-tts-2410.06608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bahasa-harmony-a-comprehensive-dataset-for-bahasa-text-to-speech-synthesis-with-discrete-codec-modeling-of-engen-tts-2410.06608"/></url>
<url><loc>https://scifaro.com/en/abs/scoreq-speech-quality-assessment-with-contrastive-regression-2410.06675</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scoreq-speech-quality-assessment-with-contrastive-regression-2410.06675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scoreq-speech-quality-assessment-with-contrastive-regression-2410.06675"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-and-rhythm-features-for-audio-classification-with-deep-convolutional-neural-networks-2410.06927</loc><lastmod>2025-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-and-rhythm-features-for-audio-classification-with-deep-convolutional-neural-networks-2410.06927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-and-rhythm-features-for-audio-classification-with-deep-convolutional-neural-networks-2410.06927"/></url>
<url><loc>https://scifaro.com/en/abs/audio-explanation-synthesis-with-generative-foundation-models-2410.07530</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-explanation-synthesis-with-generative-foundation-models-2410.07530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-explanation-synthesis-with-generative-foundation-models-2410.07530"/></url>
<url><loc>https://scifaro.com/en/abs/full-rank-no-more-low-rank-weight-training-for-modern-speech-recognition-models-2410.07771</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-rank-no-more-low-rank-weight-training-for-modern-speech-recognition-models-2410.07771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-rank-no-more-low-rank-weight-training-for-modern-speech-recognition-models-2410.07771"/></url>
<url><loc>https://scifaro.com/en/abs/intrinsicvoice-empowering-llms-with-intrinsic-real-time-voice-interaction-abilities-2410.08035</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intrinsicvoice-empowering-llms-with-intrinsic-real-time-voice-interaction-abilities-2410.08035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intrinsicvoice-empowering-llms-with-intrinsic-real-time-voice-interaction-abilities-2410.08035"/></url>
<url><loc>https://scifaro.com/en/abs/a-recurrent-neural-network-approach-to-the-answering-machine-detection-problem-2410.08235</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-recurrent-neural-network-approach-to-the-answering-machine-detection-problem-2410.08235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-recurrent-neural-network-approach-to-the-answering-machine-detection-problem-2410.08235"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-using-large-language-models-2410.08321</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-using-large-language-models-2410.08321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-using-large-language-models-2410.08321"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-fine-grained-guidance-for-diffusion-model-based-symbolic-music-generation-2410.08435</loc><lastmod>2025-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-fine-grained-guidance-for-diffusion-model-based-symbolic-music-generation-2410.08435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-fine-grained-guidance-for-diffusion-model-based-symbolic-music-generation-2410.08435"/></url>
<url><loc>https://scifaro.com/en/abs/small-tunes-transformer-exploring-macro-micro-level-hierarchies-for-skeleton-conditioned-melody-generation-2410.08626</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-tunes-transformer-exploring-macro-micro-level-hierarchies-for-skeleton-conditioned-melody-generation-2410.08626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-tunes-transformer-exploring-macro-micro-level-hierarchies-for-skeleton-conditioned-melody-generation-2410.08626"/></url>
<url><loc>https://scifaro.com/en/abs/quantum-trained-convolutional-neural-network-for-deepfake-audio-detection-2410.09250</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantum-trained-convolutional-neural-network-for-deepfake-audio-detection-2410.09250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantum-trained-convolutional-neural-network-for-deepfake-audio-detection-2410.09250"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-audio-based-disease-prediction-with-transformer-based-hierarchical-fusion-network-2410.09289</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-audio-based-disease-prediction-with-transformer-based-hierarchical-fusion-network-2410.09289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-audio-based-disease-prediction-with-transformer-based-hierarchical-fusion-network-2410.09289"/></url>
<url><loc>https://scifaro.com/en/abs/towards-the-synthesis-of-non-speech-vocalizations-2410.09360</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-the-synthesis-of-non-speech-vocalizations-2410.09360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-the-synthesis-of-non-speech-vocalizations-2410.09360"/></url>
<url><loc>https://scifaro.com/en/abs/expgest-expressive-speaker-generation-using-diffusion-model-and-hybrid-audio-text-guidance-2410.09396</loc><lastmod>2026-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expgest-expressive-speaker-generation-using-diffusion-model-and-hybrid-audio-text-guidance-2410.09396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expgest-expressive-speaker-generation-using-diffusion-model-and-hybrid-audio-text-guidance-2410.09396"/></url>
<url><loc>https://scifaro.com/en/abs/drcap-decoding-clap-latents-with-retrieval-augmented-generation-for-zero-shot-audio-captioning-2410.09472</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drcap-decoding-clap-latents-with-retrieval-augmented-generation-for-zero-shot-audio-captioning-2410.09472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drcap-decoding-clap-latents-with-retrieval-augmented-generation-for-zero-shot-audio-captioning-2410.09472"/></url>
<url><loc>https://scifaro.com/en/abs/objective-measurements-of-voice-quality-2410.09578</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-measurements-of-voice-quality-2410.09578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-measurements-of-voice-quality-2410.09578"/></url>
<url><loc>https://scifaro.com/en/abs/lead-dataset-how-can-labels-for-sound-event-detection-vary-depending-on-annotators-2410.09778</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lead-dataset-how-can-labels-for-sound-event-detection-vary-depending-on-annotators-2410.09778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lead-dataset-how-can-labels-for-sound-event-detection-vary-depending-on-annotators-2410.09778"/></url>
<url><loc>https://scifaro.com/en/abs/prompt-tuning-for-audio-deepfake-detection-computationally-efficient-test-time-domain-adaptation-with-limited-target-dataset-2410.09869</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompt-tuning-for-audio-deepfake-detection-computationally-efficient-test-time-domain-adaptation-with-limited-target-dataset-2410.09869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompt-tuning-for-audio-deepfake-detection-computationally-efficient-test-time-domain-adaptation-with-limited-target-dataset-2410.09869"/></url>
<url><loc>https://scifaro.com/en/abs/m2m-gen-a-multimodal-framework-for-automated-background-music-generation-in-japanese-manga-using-large-language-models-2410.09928</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2m-gen-a-multimodal-framework-for-automated-background-music-generation-in-japanese-manga-using-large-language-models-2410.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2m-gen-a-multimodal-framework-for-automated-background-music-generation-in-japanese-manga-using-large-language-models-2410.09928"/></url>
<url><loc>https://scifaro.com/en/abs/generative-deep-learning-and-signal-processing-for-data-augmentation-of-cardiac-auscultation-signals-improving-model-robustness-using-synthetic-audio-2410.10125</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-deep-learning-and-signal-processing-for-data-augmentation-of-cardiac-auscultation-signals-improving-model-robustness-using-synthetic-audio-2410.10125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-deep-learning-and-signal-processing-for-data-augmentation-of-cardiac-auscultation-signals-improving-model-robustness-using-synthetic-audio-2410.10125"/></url>
<url><loc>https://scifaro.com/en/abs/do-we-need-more-complex-representations-for-structure-a-comparison-of-note-duration-representation-for-music-transformers-2410.10515</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-we-need-more-complex-representations-for-structure-a-comparison-of-note-duration-representation-for-music-transformers-2410.10515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-we-need-more-complex-representations-for-structure-a-comparison-of-note-duration-representation-for-music-transformers-2410.10515"/></url>
<url><loc>https://scifaro.com/en/abs/reproducible-machine-learning-based-voice-pathology-detection-introducing-the-pitch-difference-feature-2410.10537</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reproducible-machine-learning-based-voice-pathology-detection-introducing-the-pitch-difference-feature-2410.10537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reproducible-machine-learning-based-voice-pathology-detection-introducing-the-pitch-difference-feature-2410.10537"/></url>
<url><loc>https://scifaro.com/en/abs/both-ears-wide-open-towards-language-driven-spatial-audio-generation-2410.10676</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/both-ears-wide-open-towards-language-driven-spatial-audio-generation-2410.10676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/both-ears-wide-open-towards-language-driven-spatial-audio-generation-2410.10676"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-retrieval-augmented-audio-captioning-with-generation-assisted-multimodal-querying-and-progressive-learning-2410.10913</loc><lastmod>2025-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-retrieval-augmented-audio-captioning-with-generation-assisted-multimodal-querying-and-progressive-learning-2410.10913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-retrieval-augmented-audio-captioning-with-generation-assisted-multimodal-querying-and-progressive-learning-2410.10913"/></url>
<url><loc>https://scifaro.com/en/abs/grafprint-a-gnn-based-approach-for-audio-identification-2410.10994</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/grafprint-a-gnn-based-approach-for-audio-identification-2410.10994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/grafprint-a-gnn-based-approach-for-audio-identification-2410.10994"/></url>
<url><loc>https://scifaro.com/en/abs/cleanumamba-a-compact-mamba-network-for-speech-denoising-using-channel-pruning-2410.11062</loc><lastmod>2025-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cleanumamba-a-compact-mamba-network-for-speech-denoising-using-channel-pruning-2410.11062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cleanumamba-a-compact-mamba-network-for-speech-denoising-using-channel-pruning-2410.11062"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-kinship-verification-using-age-domain-conversion-2410.11120</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-kinship-verification-using-age-domain-conversion-2410.11120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-kinship-verification-using-age-domain-conversion-2410.11120"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-speaker-representation-for-target-speaker-speech-processing-2410.11243</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-speaker-representation-for-target-speaker-speech-processing-2410.11243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-speaker-representation-for-target-speaker-speech-processing-2410.11243"/></url>
<url><loc>https://scifaro.com/en/abs/diff-sage-end-to-end-spatial-audio-generation-using-diffusion-models-2410.11299</loc><lastmod>2025-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-sage-end-to-end-spatial-audio-generation-using-diffusion-models-2410.11299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-sage-end-to-end-spatial-audio-generation-using-diffusion-models-2410.11299"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-llm-embeddings-for-cross-dataset-label-alignment-and-zero-shot-music-emotion-prediction-2410.11522</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-llm-embeddings-for-cross-dataset-label-alignment-and-zero-shot-music-emotion-prediction-2410.11522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-llm-embeddings-for-cross-dataset-label-alignment-and-zero-shot-music-emotion-prediction-2410.11522"/></url>
<url><loc>https://scifaro.com/en/abs/emotioncaps-enhancing-audio-captioning-through-emotion-augmented-data-generation-2410.12028</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotioncaps-enhancing-audio-captioning-through-emotion-augmented-data-generation-2410.12028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotioncaps-enhancing-audio-captioning-through-emotion-augmented-data-generation-2410.12028"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-rumble-automated-elephant-call-classification-detection-and-endpointing-using-deep-architectures-2410.12082</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-rumble-automated-elephant-call-classification-detection-and-endpointing-using-deep-architectures-2410.12082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-rumble-automated-elephant-call-classification-detection-and-endpointing-using-deep-architectures-2410.12082"/></url>
<url><loc>https://scifaro.com/en/abs/sf-speech-straightened-flow-for-zero-shot-voice-clone-2410.12399</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sf-speech-straightened-flow-for-zero-shot-voice-clone-2410.12399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sf-speech-straightened-flow-for-zero-shot-voice-clone-2410.12399"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-segmental-average-pooling-of-self-supervised-learning-features-2410.12416</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-segmental-average-pooling-of-self-supervised-learning-features-2410.12416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-emotion-recognition-through-segmental-average-pooling-of-self-supervised-learning-features-2410.12416"/></url>
<url><loc>https://scifaro.com/en/abs/heightceleb-an-enrichment-of-voxceleb-dataset-with-speaker-height-information-2410.12668</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heightceleb-an-enrichment-of-voxceleb-dataset-with-speaker-height-information-2410.12668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heightceleb-an-enrichment-of-voxceleb-dataset-with-speaker-height-information-2410.12668"/></url>
<url><loc>https://scifaro.com/en/abs/towards-computational-analysis-of-pansori-singing-2410.12956</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-computational-analysis-of-pansori-singing-2410.12956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-computational-analysis-of-pansori-singing-2410.12956"/></url>
<url><loc>https://scifaro.com/en/abs/muvi-video-to-music-generation-with-semantic-alignment-and-rhythmic-synchronization-2410.12957</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muvi-video-to-music-generation-with-semantic-alignment-and-rhythmic-synchronization-2410.12957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muvi-video-to-music-generation-with-semantic-alignment-and-rhythmic-synchronization-2410.12957"/></url>
<url><loc>https://scifaro.com/en/abs/aadnet-an-end-to-end-deep-learning-model-for-auditory-attention-decoding-2410.13059</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aadnet-an-end-to-end-deep-learning-model-for-auditory-attention-decoding-2410.13059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aadnet-an-end-to-end-deep-learning-model-for-auditory-attention-decoding-2410.13059"/></url>
<url><loc>https://scifaro.com/en/abs/sound-check-auditing-audio-datasets-2410.13114</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-check-auditing-audio-datasets-2410.13114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-check-auditing-audio-datasets-2410.13114"/></url>
<url><loc>https://scifaro.com/en/abs/eh-mam-easy-to-hard-masked-acoustic-modeling-for-self-supervised-speech-representation-learning-2410.13179</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eh-mam-easy-to-hard-masked-acoustic-modeling-for-self-supervised-speech-representation-learning-2410.13179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eh-mam-easy-to-hard-masked-acoustic-modeling-for-self-supervised-speech-representation-learning-2410.13179"/></url>
<url><loc>https://scifaro.com/en/abs/clamp-2-multimodal-music-information-retrieval-across-101-languages-using-large-language-models-2410.13267</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clamp-2-multimodal-music-information-retrieval-across-101-languages-using-large-language-models-2410.13267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clamp-2-multimodal-music-information-retrieval-across-101-languages-using-large-language-models-2410.13267"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-integration-of-speech-emotion-recognition-with-voice-activity-detection-using-self-supervised-learning-features-2410.13282</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-emotion-recognition-with-voice-activity-detection-using-self-supervised-learning-features-2410.13282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-emotion-recognition-with-voice-activity-detection-using-self-supervised-learning-features-2410.13282"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-1-second-3d-seld-performance-with-filter-bank-analysis-and-scconv-integration-in-cst-former-2410.13328</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-1-second-3d-seld-performance-with-filter-bank-analysis-and-scconv-integration-in-cst-former-2410.13328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-1-second-3d-seld-performance-with-filter-bank-analysis-and-scconv-integration-in-cst-former-2410.13328"/></url>
<url><loc>https://scifaro.com/en/abs/melotrans-a-text-to-symbolic-music-generation-model-following-human-composition-habit-2410.13419</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melotrans-a-text-to-symbolic-music-generation-model-following-human-composition-habit-2410.13419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melotrans-a-text-to-symbolic-music-generation-model-following-human-composition-habit-2410.13419"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-range-compression-and-its-effect-on-music-genre-classification-2410.13581</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-range-compression-and-its-effect-on-music-genre-classification-2410.13581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-range-compression-and-its-effect-on-music-genre-classification-2410.13581"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-codec-based-speech-synthesis-with-multi-token-prediction-and-speculative-decoding-2410.13839</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-codec-based-speech-synthesis-with-multi-token-prediction-and-speculative-decoding-2410.13839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-codec-based-speech-synthesis-with-multi-token-prediction-and-speculative-decoding-2410.13839"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-spatial-knowledge-understanding-for-immersive-visual-text-to-speech-2410.14101</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-spatial-knowledge-understanding-for-immersive-visual-text-to-speech-2410.14101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-spatial-knowledge-understanding-for-immersive-visual-text-to-speech-2410.14101"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-transcription-exploring-noise-injection-strategies-for-training-data-augmentation-2410.14122</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-transcription-exploring-noise-injection-strategies-for-training-data-augmentation-2410.14122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-transcription-exploring-noise-injection-strategies-for-training-data-augmentation-2410.14122"/></url>
<url><loc>https://scifaro.com/en/abs/snac-multi-scale-neural-audio-codec-2410.14411</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snac-multi-scale-neural-audio-codec-2410.14411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snac-multi-scale-neural-audio-codec-2410.14411"/></url>
<url><loc>https://scifaro.com/en/abs/embodied-exploration-of-latent-spaces-and-explainable-ai-2410.14590</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embodied-exploration-of-latent-spaces-and-explainable-ai-2410.14590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embodied-exploration-of-latent-spaces-and-explainable-ai-2410.14590"/></url>
<url><loc>https://scifaro.com/en/abs/immersediffusion-a-generative-spatial-audio-latent-diffusion-model-2410.14945</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/immersediffusion-a-generative-spatial-audio-latent-diffusion-model-2410.14945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/immersediffusion-a-generative-spatial-audio-latent-diffusion-model-2410.14945"/></url>
<url><loc>https://scifaro.com/en/abs/audio-processing-using-pattern-recognition-for-music-genre-classification-2410.14990</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-processing-using-pattern-recognition-for-music-genre-classification-2410.14990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-processing-using-pattern-recognition-for-music-genre-classification-2410.14990"/></url>
<url><loc>https://scifaro.com/en/abs/improving-pronunciation-and-accent-conversion-through-knowledge-distillation-and-synthetic-ground-truth-from-native-tts-2410.14997</loc><lastmod>2025-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-pronunciation-and-accent-conversion-through-knowledge-distillation-and-synthetic-ground-truth-from-native-tts-2410.14997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-pronunciation-and-accent-conversion-through-knowledge-distillation-and-synthetic-ground-truth-from-native-tts-2410.14997"/></url>
<url><loc>https://scifaro.com/en/abs/pat-parameter-free-audio-text-aligner-to-boost-zero-shot-audio-classification-2410.15062</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pat-parameter-free-audio-text-aligner-to-boost-zero-shot-audio-classification-2410.15062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pat-parameter-free-audio-text-aligner-to-boost-zero-shot-audio-classification-2410.15062"/></url>
<url><loc>https://scifaro.com/en/abs/consinger-efficient-high-fidelity-singing-voice-generation-with-minimal-steps-2410.15342</loc><lastmod>2025-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consinger-efficient-high-fidelity-singing-voice-generation-with-minimal-steps-2410.15342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consinger-efficient-high-fidelity-singing-voice-generation-with-minimal-steps-2410.15342"/></url>
<url><loc>https://scifaro.com/en/abs/construction-and-analysis-of-impression-caption-dataset-for-environmental-sounds-2410.15532</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/construction-and-analysis-of-impression-caption-dataset-for-environmental-sounds-2410.15532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/construction-and-analysis-of-impression-caption-dataset-for-environmental-sounds-2410.15532"/></url>
<url><loc>https://scifaro.com/en/abs/openmu-your-swiss-army-knife-for-music-understanding-2410.15573</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/openmu-your-swiss-army-knife-for-music-understanding-2410.15573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/openmu-your-swiss-army-knife-for-music-understanding-2410.15573"/></url>
<url><loc>https://scifaro.com/en/abs/aldas-audio-linguistic-data-augmentation-for-spoofed-audio-detection-2410.15577</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aldas-audio-linguistic-data-augmentation-for-spoofed-audio-detection-2410.15577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aldas-audio-linguistic-data-augmentation-for-spoofed-audio-detection-2410.15577"/></url>
<url><loc>https://scifaro.com/en/abs/moonshine-speech-recognition-for-live-transcription-and-voice-commands-2410.15608</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moonshine-speech-recognition-for-live-transcription-and-voice-commands-2410.15608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moonshine-speech-recognition-for-live-transcription-and-voice-commands-2410.15608"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-model-optimization-over-multiple-data-sources-merging-and-valuation-2410.15620</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-model-optimization-over-multiple-data-sources-merging-and-valuation-2410.15620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-model-optimization-over-multiple-data-sources-merging-and-valuation-2410.15620"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-neural-speech-codec-for-low-bitrate-compression-via-multi-scale-encoding-2410.15749</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-neural-speech-codec-for-low-bitrate-compression-via-multi-scale-encoding-2410.15749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-neural-speech-codec-for-low-bitrate-compression-via-multi-scale-encoding-2410.15749"/></url>
<url><loc>https://scifaro.com/en/abs/neural-scoring-a-refreshed-end-to-end-approach-for-speaker-recognition-in-complex-conditions-2410.16428</loc><lastmod>2025-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-scoring-a-refreshed-end-to-end-approach-for-speaker-recognition-in-complex-conditions-2410.16428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-scoring-a-refreshed-end-to-end-approach-for-speaker-recognition-in-complex-conditions-2410.16428"/></url>
<url><loc>https://scifaro.com/en/abs/alignvsr-audio-visual-cross-modal-alignment-for-visual-speech-recognition-2410.16438</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignvsr-audio-visual-cross-modal-alignment-for-visual-speech-recognition-2410.16438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignvsr-audio-visual-cross-modal-alignment-for-visual-speech-recognition-2410.16438"/></url>
<url><loc>https://scifaro.com/en/abs/do-audio-language-models-understand-linguistic-variations-2410.16505</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-audio-language-models-understand-linguistic-variations-2410.16505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-audio-language-models-understand-linguistic-variations-2410.16505"/></url>
<url><loc>https://scifaro.com/en/abs/denoasr-debiasing-asrs-through-selective-denoising-2410.16712</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoasr-debiasing-asrs-through-selective-denoising-2410.16712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoasr-debiasing-asrs-through-selective-denoising-2410.16712"/></url>
<url><loc>https://scifaro.com/en/abs/annotation-free-midi-to-audio-synthesis-via-concatenative-synthesis-and-generative-refinement-2410.16785</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/annotation-free-midi-to-audio-synthesis-via-concatenative-synthesis-and-generative-refinement-2410.16785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/annotation-free-midi-to-audio-synthesis-via-concatenative-synthesis-and-generative-refinement-2410.16785"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-feature-learning-in-weakly-labelled-bioacoustic-cetacean-datasets-via-a-variational-autoencoder-and-temporal-convolutional-network-an-interdisciplinary-approach-2410.17006</loc><lastmod>2025-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-feature-learning-in-weakly-labelled-bioacoustic-cetacean-datasets-via-a-variational-autoencoder-and-temporal-convolutional-network-an-interdisciplinary-approach-2410.17006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-feature-learning-in-weakly-labelled-bioacoustic-cetacean-datasets-via-a-variational-autoencoder-and-temporal-convolutional-network-an-interdisciplinary-approach-2410.17006"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-tokenizer-in-text-to-speech-2410.17081</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-tokenizer-in-text-to-speech-2410.17081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-tokenizer-in-text-to-speech-2410.17081"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-score-conversion-model-based-on-whisper-methodology-2410.17209</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-score-conversion-model-based-on-whisper-methodology-2410.17209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-score-conversion-model-based-on-whisper-methodology-2410.17209"/></url>
<url><loc>https://scifaro.com/en/abs/discogs-vi-a-musical-version-identification-dataset-based-on-public-editorial-metadata-2410.17400</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discogs-vi-a-musical-version-identification-dataset-based-on-public-editorial-metadata-2410.17400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discogs-vi-a-musical-version-identification-dataset-based-on-public-editorial-metadata-2410.17400"/></url>
<url><loc>https://scifaro.com/en/abs/mmwave-whisper-phone-call-eavesdropping-and-transcription-using-millimeter-wave-radar-2410.17457</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmwave-whisper-phone-call-eavesdropping-and-transcription-using-millimeter-wave-radar-2410.17457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmwave-whisper-phone-call-eavesdropping-and-transcription-using-millimeter-wave-radar-2410.17457"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-tokenization-methods-for-multitrack-sheet-music-generation-2410.17584</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-tokenization-methods-for-multitrack-sheet-music-generation-2410.17584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-tokenization-methods-for-multitrack-sheet-music-generation-2410.17584"/></url>
<url><loc>https://scifaro.com/en/abs/challenge-on-sound-scene-synthesis-evaluating-text-to-audio-generation-2410.17589</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/challenge-on-sound-scene-synthesis-evaluating-text-to-audio-generation-2410.17589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/challenge-on-sound-scene-synthesis-evaluating-text-to-audio-generation-2410.17589"/></url>
<url><loc>https://scifaro.com/en/abs/music102-an-d-12-equivariant-transformer-for-chord-progression-accompaniment-2410.18151</loc><lastmod>2026-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music102-an-d-12-equivariant-transformer-for-chord-progression-accompaniment-2410.18151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music102-an-d-12-equivariant-transformer-for-chord-progression-accompaniment-2410.18151"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-melody-construction-for-persian-lyrics-using-lstm-recurrent-neural-networks-2410.18203</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-melody-construction-for-persian-lyrics-using-lstm-recurrent-neural-networks-2410.18203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-melody-construction-for-persian-lyrics-using-lstm-recurrent-neural-networks-2410.18203"/></url>
<url><loc>https://scifaro.com/en/abs/unified-microphone-conversion-many-to-many-device-mapping-via-feature-wise-linear-modulation-2410.18322</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-microphone-conversion-many-to-many-device-mapping-via-feature-wise-linear-modulation-2410.18322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-microphone-conversion-many-to-many-device-mapping-via-feature-wise-linear-modulation-2410.18322"/></url>
<url><loc>https://scifaro.com/en/abs/gibberish-is-all-you-need-for-membership-inference-detection-in-contrastive-language-audio-pretraining-2410.18371</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gibberish-is-all-you-need-for-membership-inference-detection-in-contrastive-language-audio-pretraining-2410.18371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gibberish-is-all-you-need-for-membership-inference-detection-in-contrastive-language-audio-pretraining-2410.18371"/></url>
<url><loc>https://scifaro.com/en/abs/wavetable-synthesis-using-cvae-for-timbre-control-based-on-semantic-label-2410.18628</loc><lastmod>2024-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavetable-synthesis-using-cvae-for-timbre-control-based-on-semantic-label-2410.18628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavetable-synthesis-using-cvae-for-timbre-control-based-on-semantic-label-2410.18628"/></url>
<url><loc>https://scifaro.com/en/abs/closermusicdb-a-modern-multipurpose-dataset-of-high-quality-music-2410.19540</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/closermusicdb-a-modern-multipurpose-dataset-of-high-quality-music-2410.19540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/closermusicdb-a-modern-multipurpose-dataset-of-high-quality-music-2410.19540"/></url>
<url><loc>https://scifaro.com/en/abs/arabic-music-classification-and-generation-using-deep-learning-2410.19719</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arabic-music-classification-and-generation-using-deep-learning-2410.19719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arabic-music-classification-and-generation-using-deep-learning-2410.19719"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-convolution-based-hybrid-model-approach-with-representation-learning-for-real-time-acoustic-anomaly-detection-2410.19722</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-convolution-based-hybrid-model-approach-with-representation-learning-for-real-time-acoustic-anomaly-detection-2410.19722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-convolution-based-hybrid-model-approach-with-representation-learning-for-real-time-acoustic-anomaly-detection-2410.19722"/></url>
<url><loc>https://scifaro.com/en/abs/an-approach-to-hummed-tune-and-song-sequences-matching-2410.20352</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-approach-to-hummed-tune-and-song-sequences-matching-2410.20352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-approach-to-hummed-tune-and-song-sequences-matching-2410.20352"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-gan-for-enhancing-diffusion-models-in-efficient-and-authentic-global-gesture-generation-from-audios-2410.20359</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-gan-for-enhancing-diffusion-models-in-efficient-and-authentic-global-gesture-generation-from-audios-2410.20359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-gan-for-enhancing-diffusion-models-in-efficient-and-authentic-global-gesture-generation-from-audios-2410.20359"/></url>
<url><loc>https://scifaro.com/en/abs/musicflow-cascaded-flow-matching-for-text-guided-music-generation-2410.20478</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicflow-cascaded-flow-matching-for-text-guided-music-generation-2410.20478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicflow-cascaded-flow-matching-for-text-guided-music-generation-2410.20478"/></url>
<url><loc>https://scifaro.com/en/abs/symbotunes-unified-hub-for-symbolic-music-generative-models-2410.20515</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbotunes-unified-hub-for-symbolic-music-generative-models-2410.20515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbotunes-unified-hub-for-symbolic-music-generative-models-2410.20515"/></url>
<url><loc>https://scifaro.com/en/abs/miditok-visualizer-a-tool-for-visualization-and-analysis-of-tokenized-midi-symbolic-music-2410.20518</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/miditok-visualizer-a-tool-for-visualization-and-analysis-of-tokenized-midi-symbolic-music-2410.20518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/miditok-visualizer-a-tool-for-visualization-and-analysis-of-tokenized-midi-symbolic-music-2410.20518"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-estimation-of-singing-voice-musical-dynamics-2410.20540</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-estimation-of-singing-voice-musical-dynamics-2410.20540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-estimation-of-singing-voice-musical-dynamics-2410.20540"/></url>
<url><loc>https://scifaro.com/en/abs/mitigating-unauthorized-speech-synthesis-for-voice-protection-2410.20742</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mitigating-unauthorized-speech-synthesis-for-voice-protection-2410.20742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mitigating-unauthorized-speech-synthesis-for-voice-protection-2410.20742"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-approach-to-music-source-separation-a-comparative-analysis-of-conventional-and-hierarchical-stem-separation-2410.20773</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-approach-to-music-source-separation-a-comparative-analysis-of-conventional-and-hierarchical-stem-separation-2410.20773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-approach-to-music-source-separation-a-comparative-analysis-of-conventional-and-hierarchical-stem-separation-2410.20773"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-via-distilling-and-progressive-pruning-2410.20775</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-via-distilling-and-progressive-pruning-2410.20775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-via-distilling-and-progressive-pruning-2410.20775"/></url>
<url><loc>https://scifaro.com/en/abs/atrial-fibrillation-detection-system-via-acoustic-sensing-for-mobile-phones-2410.20852</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atrial-fibrillation-detection-system-via-acoustic-sensing-for-mobile-phones-2410.20852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atrial-fibrillation-detection-system-via-acoustic-sensing-for-mobile-phones-2410.20852"/></url>
<url><loc>https://scifaro.com/en/abs/sepmamba-state-space-models-for-speaker-separation-using-mamba-2410.20997</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sepmamba-state-space-models-for-speaker-separation-using-mamba-2410.20997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sepmamba-state-space-models-for-speaker-separation-using-mamba-2410.20997"/></url>
<url><loc>https://scifaro.com/en/abs/st-ito-controlling-audio-effects-for-style-transfer-with-inference-time-optimization-2410.21233</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/st-ito-controlling-audio-effects-for-style-transfer-with-inference-time-optimization-2410.21233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/st-ito-controlling-audio-effects-for-style-transfer-with-inference-time-optimization-2410.21233"/></url>
<url><loc>https://scifaro.com/en/abs/omnisep-unified-omni-modality-sound-separation-with-query-mixup-2410.21269</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omnisep-unified-omni-modality-sound-separation-with-query-mixup-2410.21269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omnisep-unified-omni-modality-sound-separation-with-query-mixup-2410.21269"/></url>
<url><loc>https://scifaro.com/en/abs/producer-vs-rapper-who-dominates-the-hip-hop-sound-a-case-study-2410.21297</loc><lastmod>2025-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/producer-vs-rapper-who-dominates-the-hip-hop-sound-a-case-study-2410.21297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/producer-vs-rapper-who-dominates-the-hip-hop-sound-a-case-study-2410.21297"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-real-time-classification-of-early-media-in-voice-communications-2410.21478</loc><lastmod>2025-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-real-time-classification-of-early-media-in-voice-communications-2410.21478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-real-time-classification-of-early-media-in-voice-communications-2410.21478"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-tts-stability-in-hebrew-using-discrete-semantic-units-2410.21502</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-tts-stability-in-hebrew-using-discrete-semantic-units-2410.21502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-tts-stability-in-hebrew-using-discrete-semantic-units-2410.21502"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-score-cam-based-denoiser-for-spectrographic-signature-extraction-without-ground-truth-2410.21557</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-score-cam-based-denoiser-for-spectrographic-signature-extraction-without-ground-truth-2410.21557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-score-cam-based-denoiser-for-spectrographic-signature-extraction-without-ground-truth-2410.21557"/></url>
<url><loc>https://scifaro.com/en/abs/audio-classification-of-low-feature-spectrograms-utilizing-convolutional-neural-networks-2410.21561</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-classification-of-low-feature-spectrograms-utilizing-convolutional-neural-networks-2410.21561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-classification-of-low-feature-spectrograms-utilizing-convolutional-neural-networks-2410.21561"/></url>
<url><loc>https://scifaro.com/en/abs/rdsinger-reference-based-diffusion-network-for-singing-voice-synthesis-2410.21641</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rdsinger-reference-based-diffusion-network-for-singing-voice-synthesis-2410.21641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rdsinger-reference-based-diffusion-network-for-singing-voice-synthesis-2410.21641"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-self-learning-enhanced-music-emotion-recognition-2410.21897</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-self-learning-enhanced-music-emotion-recognition-2410.21897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-self-learning-enhanced-music-emotion-recognition-2410.21897"/></url>
<url><loc>https://scifaro.com/en/abs/chordonomicon-a-dataset-of-666-000-songs-and-their-chord-progressions-2410.22046</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chordonomicon-a-dataset-of-666-000-songs-and-their-chord-progressions-2410.22046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chordonomicon-a-dataset-of-666-000-songs-and-their-chord-progressions-2410.22046"/></url>
<url><loc>https://scifaro.com/en/abs/uspeech-ultrasound-enhanced-speech-with-minimal-human-effort-via-cross-modal-synthesis-2410.22076</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uspeech-ultrasound-enhanced-speech-with-minimal-human-effort-via-cross-modal-synthesis-2410.22076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uspeech-ultrasound-enhanced-speech-with-minimal-human-effort-via-cross-modal-synthesis-2410.22076"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-guided-image-to-music-generation-2410.22299</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-guided-image-to-music-generation-2410.22299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-guided-image-to-music-generation-2410.22299"/></url>
<url><loc>https://scifaro.com/en/abs/doa-aware-audio-visual-self-supervised-learning-for-sound-event-localization-and-detection-2410.22803</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/doa-aware-audio-visual-self-supervised-learning-for-sound-event-localization-and-detection-2410.22803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/doa-aware-audio-visual-self-supervised-learning-for-sound-event-localization-and-detection-2410.22803"/></url>
<url><loc>https://scifaro.com/en/abs/run-time-adaptation-of-neural-beamforming-for-robust-speech-dereverberation-and-denoising-2410.22805</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/run-time-adaptation-of-neural-beamforming-for-robust-speech-dereverberation-and-denoising-2410.22805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/run-time-adaptation-of-neural-beamforming-for-robust-speech-dereverberation-and-denoising-2410.22805"/></url>
<url><loc>https://scifaro.com/en/abs/improving-musical-accompaniment-co-creation-via-diffusion-transformers-2410.23005</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-musical-accompaniment-co-creation-via-diffusion-transformers-2410.23005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-musical-accompaniment-co-creation-via-diffusion-transformers-2410.23005"/></url>
<url><loc>https://scifaro.com/en/abs/soundcollage-automated-discovery-of-new-classes-in-audio-datasets-2410.23008</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundcollage-automated-discovery-of-new-classes-in-audio-datasets-2410.23008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundcollage-automated-discovery-of-new-classes-in-audio-datasets-2410.23008"/></url>
<url><loc>https://scifaro.com/en/abs/learning-marmoset-vocal-patterns-with-a-masked-autoencoder-for-robust-call-segmentation-classification-and-caller-identification-2410.23279</loc><lastmod>2025-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-marmoset-vocal-patterns-with-a-masked-autoencoder-for-robust-call-segmentation-classification-and-caller-identification-2410.23279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-marmoset-vocal-patterns-with-a-masked-autoencoder-for-robust-call-segmentation-classification-and-caller-identification-2410.23279"/></url>
<url><loc>https://scifaro.com/en/abs/neurobench-dcase-2020-acoustic-scene-classification-benchmark-on-xyloaudio-2-2410.23776</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neurobench-dcase-2020-acoustic-scene-classification-benchmark-on-xyloaudio-2-2410.23776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neurobench-dcase-2020-acoustic-scene-classification-benchmark-on-xyloaudio-2-2410.23776"/></url>
<url><loc>https://scifaro.com/en/abs/improving-snore-detection-under-limited-dataset-through-harmonic-percussive-source-separation-and-convolutional-neural-networks-2410.23796</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-snore-detection-under-limited-dataset-through-harmonic-percussive-source-separation-and-convolutional-neural-networks-2410.23796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-snore-detection-under-limited-dataset-through-harmonic-percussive-source-separation-and-convolutional-neural-networks-2410.23796"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-hwc-system-for-the-iscslp-2024-inspirational-and-convincing-audio-generation-challenge-2410.23815</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-hwc-system-for-the-iscslp-2024-inspirational-and-convincing-audio-generation-challenge-2410.23815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-hwc-system-for-the-iscslp-2024-inspirational-and-convincing-audio-generation-challenge-2410.23815"/></url>
<url><loc>https://scifaro.com/en/abs/the-iscslp-2024-conversational-voice-clone-covoc-challenge-tasks-results-and-findings-2411.00064</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-iscslp-2024-conversational-voice-clone-covoc-challenge-tasks-results-and-findings-2411.00064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-iscslp-2024-conversational-voice-clone-covoc-challenge-tasks-results-and-findings-2411.00064"/></url>
<url><loc>https://scifaro.com/en/abs/i-can-hear-you-selective-robust-training-for-deepfake-audio-detection-2411.00121</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-can-hear-you-selective-robust-training-for-deepfake-audio-detection-2411.00121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-can-hear-you-selective-robust-training-for-deepfake-audio-detection-2411.00121"/></url>
<url><loc>https://scifaro.com/en/abs/angular-distance-distribution-loss-for-audio-classification-2411.00153</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/angular-distance-distribution-loss-for-audio-classification-2411.00153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/angular-distance-distribution-loss-for-audio-classification-2411.00153"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-framework-for-audio-based-content-evaluation-using-mfcc-chroma-spectral-contrast-and-temporal-feature-engineering-2411.00195</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-framework-for-audio-based-content-evaluation-using-mfcc-chroma-spectral-contrast-and-temporal-feature-engineering-2411.00195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-framework-for-audio-based-content-evaluation-using-mfcc-chroma-spectral-contrast-and-temporal-feature-engineering-2411.00195"/></url>
<url><loc>https://scifaro.com/en/abs/improving-musical-instrument-classification-with-advanced-machine-learning-techniques-2411.00275</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-musical-instrument-classification-with-advanced-machine-learning-techniques-2411.00275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-musical-instrument-classification-with-advanced-machine-learning-techniques-2411.00275"/></url>
<url><loc>https://scifaro.com/en/abs/mace-leveraging-audio-for-evaluating-audio-captioning-systems-2411.00321</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mace-leveraging-audio-for-evaluating-audio-captioning-systems-2411.00321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mace-leveraging-audio-for-evaluating-audio-captioning-systems-2411.00321"/></url>
<url><loc>https://scifaro.com/en/abs/mdctcodec-a-lightweight-mdct-based-neural-audio-codec-towards-high-sampling-rate-and-low-bitrate-scenarios-2411.00464</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdctcodec-a-lightweight-mdct-based-neural-audio-codec-towards-high-sampling-rate-and-low-bitrate-scenarios-2411.00464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdctcodec-a-lightweight-mdct-based-neural-audio-codec-towards-high-sampling-rate-and-low-bitrate-scenarios-2411.00464"/></url>
<url><loc>https://scifaro.com/en/abs/mirflex-music-information-retrieval-feature-library-for-extraction-2411.00469</loc><lastmod>2025-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mirflex-music-information-retrieval-feature-library-for-extraction-2411.00469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mirflex-music-information-retrieval-feature-library-for-extraction-2411.00469"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-information-fusion-of-acoustic-and-linguistic-data-for-decoding-dairy-cow-vocalizations-in-animal-welfare-assessment-2411.00477</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-information-fusion-of-acoustic-and-linguistic-data-for-decoding-dairy-cow-vocalizations-in-animal-welfare-assessment-2411.00477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-information-fusion-of-acoustic-and-linguistic-data-for-decoding-dairy-cow-vocalizations-in-animal-welfare-assessment-2411.00477"/></url>
<url><loc>https://scifaro.com/en/abs/freeze-omni-a-smart-and-low-latency-speech-to-speech-dialogue-model-with-frozen-llm-2411.00774</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freeze-omni-a-smart-and-low-latency-speech-to-speech-dialogue-model-with-frozen-llm-2411.00774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freeze-omni-a-smart-and-low-latency-speech-to-speech-dialogue-model-with-frozen-llm-2411.00774"/></url>
<url><loc>https://scifaro.com/en/abs/music-foundation-model-as-generic-booster-for-music-downstream-tasks-2411.01135</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-foundation-model-as-generic-booster-for-music-downstream-tasks-2411.01135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-foundation-model-as-generic-booster-for-music-downstream-tasks-2411.01135"/></url>
<url><loc>https://scifaro.com/en/abs/fish-speech-leveraging-large-language-models-for-advanced-multilingual-text-to-speech-synthesis-2411.01156</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fish-speech-leveraging-large-language-models-for-advanced-multilingual-text-to-speech-synthesis-2411.01156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fish-speech-leveraging-large-language-models-for-advanced-multilingual-text-to-speech-synthesis-2411.01156"/></url>
<url><loc>https://scifaro.com/en/abs/sing-on-your-beat-simple-text-controllable-accompaniment-generations-2411.01661</loc><lastmod>2024-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sing-on-your-beat-simple-text-controllable-accompaniment-generations-2411.01661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sing-on-your-beat-simple-text-controllable-accompaniment-generations-2411.01661"/></url>
<url><loc>https://scifaro.com/en/abs/momu-diffusion-on-learning-long-term-motion-music-synchronization-and-correspondence-2411.01805</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/momu-diffusion-on-learning-long-term-motion-music-synchronization-and-correspondence-2411.01805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/momu-diffusion-on-learning-long-term-motion-music-synchronization-and-correspondence-2411.01805"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-voice-conversion-via-content-aware-timbre-ensemble-and-conditional-flow-matching-2411.02026</loc><lastmod>2025-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-via-content-aware-timbre-ensemble-and-conditional-flow-matching-2411.02026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-via-content-aware-timbre-ensemble-and-conditional-flow-matching-2411.02026"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-transport-maps-are-good-voice-converters-2411.02402</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-transport-maps-are-good-voice-converters-2411.02402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-transport-maps-are-good-voice-converters-2411.02402"/></url>
<url><loc>https://scifaro.com/en/abs/piast-a-multimodal-piano-dataset-with-audio-symbolic-and-text-2411.02551</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/piast-a-multimodal-piano-dataset-with-audio-symbolic-and-text-2411.02551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/piast-a-multimodal-piano-dataset-with-audio-symbolic-and-text-2411.02551"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-the-number-and-locations-of-boundaries-in-reverberant-environments-with-deep-learning-2411.02609</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-the-number-and-locations-of-boundaries-in-reverberant-environments-with-deep-learning-2411.02609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-the-number-and-locations-of-boundaries-in-reverberant-environments-with-deep-learning-2411.02609"/></url>
<url><loc>https://scifaro.com/en/abs/emosphere-emotion-controllable-zero-shot-text-to-speech-via-emotion-adaptive-spherical-vector-2411.02625</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emosphere-emotion-controllable-zero-shot-text-to-speech-via-emotion-adaptive-spherical-vector-2411.02625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emosphere-emotion-controllable-zero-shot-text-to-speech-via-emotion-adaptive-spherical-vector-2411.02625"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-multi-view-learning-for-disentangled-music-audio-representations-2411.02711</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-multi-view-learning-for-disentangled-music-audio-representations-2411.02711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-multi-view-learning-for-disentangled-music-audio-representations-2411.02711"/></url>
<url><loc>https://scifaro.com/en/abs/demonet-underwater-acoustic-target-recognition-based-on-multi-expert-network-and-cross-temporal-variational-autoencoder-2411.02758</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/demonet-underwater-acoustic-target-recognition-based-on-multi-expert-network-and-cross-temporal-variational-autoencoder-2411.02758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/demonet-underwater-acoustic-target-recognition-based-on-multi-expert-network-and-cross-temporal-variational-autoencoder-2411.02758"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-robust-underwater-acoustic-target-recognition-through-multi-task-learning-and-multi-gate-mixture-of-experts-2411.02787</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-robust-underwater-acoustic-target-recognition-through-multi-task-learning-and-multi-gate-mixture-of-experts-2411.02787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-robust-underwater-acoustic-target-recognition-through-multi-task-learning-and-multi-gate-mixture-of-experts-2411.02787"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-multi-task-underwater-acoustic-target-recognition-towards-robustness-against-various-influential-factors-2411.02848</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-multi-task-underwater-acoustic-target-recognition-towards-robustness-against-various-influential-factors-2411.02848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-multi-task-underwater-acoustic-target-recognition-towards-robustness-against-various-influential-factors-2411.02848"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-emotion-recognition-leveraging-self-supervised-models-for-feature-extraction-using-wav2vec2-and-hubert-2411.02964</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-emotion-recognition-leveraging-self-supervised-models-for-feature-extraction-using-wav2vec2-and-hubert-2411.02964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-emotion-recognition-leveraging-self-supervised-models-for-feature-extraction-using-wav2vec2-and-hubert-2411.02964"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-scream-detection-and-position-estimation-for-worker-safety-in-construction-sites-2411.03016</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-scream-detection-and-position-estimation-for-worker-safety-in-construction-sites-2411.03016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-scream-detection-and-position-estimation-for-worker-safety-in-construction-sites-2411.03016"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-with-pretrained-frontend-to-minimize-domain-mismatch-2411.03085</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-with-pretrained-frontend-to-minimize-domain-mismatch-2411.03085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-with-pretrained-frontend-to-minimize-domain-mismatch-2411.03085"/></url>
<url><loc>https://scifaro.com/en/abs/ptse-t-presentation-target-speaker-extraction-using-unaligned-text-cues-2411.03109</loc><lastmod>2026-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ptse-t-presentation-target-speaker-extraction-using-unaligned-text-cues-2411.03109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ptse-t-presentation-target-speaker-extraction-using-unaligned-text-cues-2411.03109"/></url>
<url><loc>https://scifaro.com/en/abs/mobile-recording-device-recognition-based-cross-scale-and-multi-level-representation-learning-2411.03668</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mobile-recording-device-recognition-based-cross-scale-and-multi-level-representation-learning-2411.03668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mobile-recording-device-recognition-based-cross-scale-and-multi-level-representation-learning-2411.03668"/></url>
<url><loc>https://scifaro.com/en/abs/mos-bench-benchmarking-generalization-abilities-of-subjective-speech-quality-assessment-models-2411.03715</loc><lastmod>2026-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mos-bench-benchmarking-generalization-abilities-of-subjective-speech-quality-assessment-models-2411.03715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mos-bench-benchmarking-generalization-abilities-of-subjective-speech-quality-assessment-models-2411.03715"/></url>
<url><loc>https://scifaro.com/en/abs/long-form-text-to-music-generation-with-adaptive-prompts-a-case-study-in-tabletop-role-playing-games-soundtracks-2411.03948</loc><lastmod>2025-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-form-text-to-music-generation-with-adaptive-prompts-a-case-study-in-tabletop-role-playing-games-soundtracks-2411.03948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-form-text-to-music-generation-with-adaptive-prompts-a-case-study-in-tabletop-role-playing-games-soundtracks-2411.03948"/></url>
<url><loc>https://scifaro.com/en/abs/neural-enhanced-dynamic-range-compression-inversion-a-hybrid-approach-for-restoring-audio-dynamics-2411.04337</loc><lastmod>2025-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-enhanced-dynamic-range-compression-inversion-a-hybrid-approach-for-restoring-audio-dynamics-2411.04337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-enhanced-dynamic-range-compression-inversion-a-hybrid-approach-for-restoring-audio-dynamics-2411.04337"/></url>
<url><loc>https://scifaro.com/en/abs/the-concatenator-a-bayesian-approach-to-real-time-concatenative-musaicing-2411.04366</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-concatenator-a-bayesian-approach-to-real-time-concatenative-musaicing-2411.04366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-concatenator-a-bayesian-approach-to-real-time-concatenative-musaicing-2411.04366"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-neural-networks-trained-on-noisy-speech-exhibit-the-mcgurk-effect-2411.05715</loc><lastmod>2025-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-neural-networks-trained-on-noisy-speech-exhibit-the-mcgurk-effect-2411.05715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-neural-networks-trained-on-noisy-speech-exhibit-the-mcgurk-effect-2411.05715"/></url>
<url><loc>https://scifaro.com/en/abs/toward-transdisciplinary-approaches-to-audio-deepfake-discernment-2411.05969</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-transdisciplinary-approaches-to-audio-deepfake-discernment-2411.05969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-transdisciplinary-approaches-to-audio-deepfake-discernment-2411.05969"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-volume-rendering-for-neural-impulse-response-fields-2411.06307</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-volume-rendering-for-neural-impulse-response-fields-2411.06307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-volume-rendering-for-neural-impulse-response-fields-2411.06307"/></url>
<url><loc>https://scifaro.com/en/abs/wavehax-aliasing-free-neural-waveform-synthesis-based-on-2d-convolution-and-harmonic-prior-for-reliable-complex-spectrogram-estimation-2411.06807</loc><lastmod>2025-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavehax-aliasing-free-neural-waveform-synthesis-based-on-2d-convolution-and-harmonic-prior-for-reliable-complex-spectrogram-estimation-2411.06807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavehax-aliasing-free-neural-waveform-synthesis-based-on-2d-convolution-and-harmonic-prior-for-reliable-complex-spectrogram-estimation-2411.06807"/></url>
<url><loc>https://scifaro.com/en/abs/timing-and-dynamics-of-the-rosanna-shuffle-2411.06892</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timing-and-dynamics-of-the-rosanna-shuffle-2411.06892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timing-and-dynamics-of-the-rosanna-shuffle-2411.06892"/></url>
<url><loc>https://scifaro.com/en/abs/multi-class-decoding-of-attended-speaker-direction-using-electroencephalogram-and-audio-spatial-spectrum-2411.06928</loc><lastmod>2025-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-class-decoding-of-attended-speaker-direction-using-electroencephalogram-and-audio-spatial-spectrum-2411.06928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-class-decoding-of-attended-speaker-direction-using-electroencephalogram-and-audio-spatial-spectrum-2411.06928"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-based-decoder-only-approach-with-bidirectional-speech-modeling-for-speech-recognition-2411.06968</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-based-decoder-only-approach-with-bidirectional-speech-modeling-for-speech-recognition-2411.06968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-based-decoder-only-approach-with-bidirectional-speech-modeling-for-speech-recognition-2411.06968"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-based-3d-human-pose-estimation-robust-to-human-position-2411.07165</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-based-3d-human-pose-estimation-robust-to-human-position-2411.07165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-based-3d-human-pose-estimation-robust-to-human-position-2411.07165"/></url>
<url><loc>https://scifaro.com/en/abs/naturelm-audio-an-audio-language-foundation-model-for-bioacoustics-2411.07186</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturelm-audio-an-audio-language-foundation-model-for-bioacoustics-2411.07186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturelm-audio-an-audio-language-foundation-model-for-bioacoustics-2411.07186"/></url>
<url><loc>https://scifaro.com/en/abs/just-label-the-repeats-for-in-the-wild-audio-to-score-alignment-2411.07428</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/just-label-the-repeats-for-in-the-wild-audio-to-score-alignment-2411.07428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/just-label-the-repeats-for-in-the-wild-audio-to-score-alignment-2411.07428"/></url>
<url><loc>https://scifaro.com/en/abs/music-discovery-dialogue-generation-using-human-intent-analysis-and-large-language-models-2411.07439</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-discovery-dialogue-generation-using-human-intent-analysis-and-large-language-models-2411.07439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-discovery-dialogue-generation-using-human-intent-analysis-and-large-language-models-2411.07439"/></url>
<url><loc>https://scifaro.com/en/abs/a-generalist-audio-foundation-model-for-comprehensive-body-sound-auscultation-2411.07547</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generalist-audio-foundation-model-for-comprehensive-body-sound-auscultation-2411.07547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generalist-audio-foundation-model-for-comprehensive-body-sound-auscultation-2411.07547"/></url>
<url><loc>https://scifaro.com/en/abs/sav-se-scene-aware-audio-visual-speech-enhancement-with-selective-state-space-model-2411.07751</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sav-se-scene-aware-audio-visual-speech-enhancement-with-selective-state-space-model-2411.07751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sav-se-scene-aware-audio-visual-speech-enhancement-with-selective-state-space-model-2411.07751"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-effectiveness-of-explainability-methods-in-parkinson-s-detection-from-speech-2411.08013</loc><lastmod>2024-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-effectiveness-of-explainability-methods-in-parkinson-s-detection-from-speech-2411.08013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-effectiveness-of-explainability-methods-in-parkinson-s-detection-from-speech-2411.08013"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-pitch-content-in-traditional-ghanaian-seperewa-songs-2411.08234</loc><lastmod>2024-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-pitch-content-in-traditional-ghanaian-seperewa-songs-2411.08234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-pitch-content-in-traditional-ghanaian-seperewa-songs-2411.08234"/></url>
<url><loc>https://scifaro.com/en/abs/developing-an-effective-training-dataset-to-enhance-the-performance-of-ai-based-speaker-separation-systems-2411.08375</loc><lastmod>2024-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-an-effective-training-dataset-to-enhance-the-performance-of-ai-based-speaker-separation-systems-2411.08375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-an-effective-training-dataset-to-enhance-the-performance-of-ai-based-speaker-separation-systems-2411.08375"/></url>
<url><loc>https://scifaro.com/en/abs/language-models-for-music-medicine-generation-2411.09080</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-models-for-music-medicine-generation-2411.09080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-models-for-music-medicine-generation-2411.09080"/></url>
<url><loc>https://scifaro.com/en/abs/robust-ai-synthesized-speech-detection-using-feature-decomposition-learning-and-synthesizer-feature-augmentation-2411.09167</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-ai-synthesized-speech-detection-using-feature-decomposition-learning-and-synthesizer-feature-augmentation-2411.09167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-ai-synthesized-speech-detection-using-feature-decomposition-learning-and-synthesizer-feature-augmentation-2411.09167"/></url>
<url><loc>https://scifaro.com/en/abs/eeg-based-speech-decoding-a-novel-approach-using-multi-kernel-ensemble-diffusion-models-2411.09302</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg-based-speech-decoding-a-novel-approach-using-multi-kernel-ensemble-diffusion-models-2411.09302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg-based-speech-decoding-a-novel-approach-using-multi-kernel-ensemble-diffusion-models-2411.09302"/></url>
<url><loc>https://scifaro.com/en/abs/re-parameterization-of-lightweight-transformer-for-on-device-speech-emotion-recognition-2411.09339</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/re-parameterization-of-lightweight-transformer-for-on-device-speech-emotion-recognition-2411.09339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/re-parameterization-of-lightweight-transformer-for-on-device-speech-emotion-recognition-2411.09339"/></url>
<url><loc>https://scifaro.com/en/abs/paralbench-a-large-scale-benchmark-for-computational-paralinguistics-over-acoustic-foundation-models-2411.09349</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paralbench-a-large-scale-benchmark-for-computational-paralinguistics-over-acoustic-foundation-models-2411.09349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paralbench-a-large-scale-benchmark-for-computational-paralinguistics-over-acoustic-foundation-models-2411.09349"/></url>
<url><loc>https://scifaro.com/en/abs/local-deployment-of-large-scale-music-ai-models-on-commodity-hardware-2411.09625</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-deployment-of-large-scale-music-ai-models-on-commodity-hardware-2411.09625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-deployment-of-large-scale-music-ai-models-on-commodity-hardware-2411.09625"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-voice-conversion-with-diffusion-transformers-2411.09943</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-with-diffusion-transformers-2411.09943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-voice-conversion-with-diffusion-transformers-2411.09943"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-and-spectrum-aware-singing-quality-assessment-with-bias-correction-and-model-fusion-2411.11123</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-and-spectrum-aware-singing-quality-assessment-with-bias-correction-and-model-fusion-2411.11123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-and-spectrum-aware-singing-quality-assessment-with-bias-correction-and-model-fusion-2411.11123"/></url>
<url><loc>https://scifaro.com/en/abs/samos-a-neural-mos-prediction-model-leveraging-semantic-representations-and-acoustic-features-2411.11232</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samos-a-neural-mos-prediction-model-leveraging-semantic-representations-and-acoustic-features-2411.11232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samos-a-neural-mos-prediction-model-leveraging-semantic-representations-and-acoustic-features-2411.11232"/></url>
<url><loc>https://scifaro.com/en/abs/estvocoder-an-excitation-spectral-transformed-neural-vocoder-conditioned-on-mel-spectrogram-2411.11258</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estvocoder-an-excitation-spectral-transformed-neural-vocoder-conditioned-on-mel-spectrogram-2411.11258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estvocoder-an-excitation-spectral-transformed-neural-vocoder-conditioned-on-mel-spectrogram-2411.11258"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-the-performance-of-ceemdan-in-underdetermined-speech-separation-2411.11312</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-the-performance-of-ceemdan-in-underdetermined-speech-separation-2411.11312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-the-performance-of-ceemdan-in-underdetermined-speech-separation-2411.11312"/></url>
<url><loc>https://scifaro.com/en/abs/using-voice-analysis-as-an-early-indicator-of-risk-for-depression-in-young-adults-2411.11541</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-voice-analysis-as-an-early-indicator-of-risk-for-depression-in-young-adults-2411.11541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-voice-analysis-as-an-early-indicator-of-risk-for-depression-in-young-adults-2411.11541"/></url>
<url><loc>https://scifaro.com/en/abs/do-captioning-metrics-reflect-music-semantic-alignment-2411.11692</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-captioning-metrics-reflect-music-semantic-alignment-2411.11692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-captioning-metrics-reflect-music-semantic-alignment-2411.11692"/></url>
<url><loc>https://scifaro.com/en/abs/compression-of-higher-order-ambisonics-with-multichannel-rvqgan-2411.12008</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compression-of-higher-order-ambisonics-with-multichannel-rvqgan-2411.12008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compression-of-higher-order-ambisonics-with-multichannel-rvqgan-2411.12008"/></url>
<url><loc>https://scifaro.com/en/abs/vision-language-models-are-few-shot-audio-spectrogram-classifiers-2411.12058</loc><lastmod>2024-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vision-language-models-are-few-shot-audio-spectrogram-classifiers-2411.12058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vision-language-models-are-few-shot-audio-spectrogram-classifiers-2411.12058"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-crate-digging-dj-tool-retrieval-using-speech-activity-music-structure-and-clap-embeddings-2411.12209</loc><lastmod>2024-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-crate-digging-dj-tool-retrieval-using-speech-activity-music-structure-and-clap-embeddings-2411.12209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-crate-digging-dj-tool-retrieval-using-speech-activity-music-structure-and-clap-embeddings-2411.12209"/></url>
<url><loc>https://scifaro.com/en/abs/dgsna-dynamic-generative-scene-based-noise-addition-method-2411.12363</loc><lastmod>2026-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dgsna-dynamic-generative-scene-based-noise-addition-method-2411.12363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dgsna-dynamic-generative-scene-based-noise-addition-method-2411.12363"/></url>
<url><loc>https://scifaro.com/en/abs/improving-controllability-and-editability-for-pretrained-text-to-music-generation-models-2411.12641</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-controllability-and-editability-for-pretrained-text-to-music-generation-models-2411.12641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-controllability-and-editability-for-pretrained-text-to-music-generation-models-2411.12641"/></url>
<url><loc>https://scifaro.com/en/abs/sonnet-enhancing-time-delay-estimation-by-leveraging-simulated-audio-2411.13179</loc><lastmod>2024-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonnet-enhancing-time-delay-estimation-by-leveraging-simulated-audio-2411.13179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonnet-enhancing-time-delay-estimation-by-leveraging-simulated-audio-2411.13179"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-audio-feature-extraction-for-real-time-talking-portrait-synthesis-2411.13209</loc><lastmod>2024-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-audio-feature-extraction-for-real-time-talking-portrait-synthesis-2411.13209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-audio-feature-extraction-for-real-time-talking-portrait-synthesis-2411.13209"/></url>
<url><loc>https://scifaro.com/en/abs/i2tts-image-indicated-immersive-text-to-speech-synthesis-with-spatial-perception-2411.13314</loc><lastmod>2025-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i2tts-image-indicated-immersive-text-to-speech-synthesis-with-spatial-perception-2411.13314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i2tts-image-indicated-immersive-text-to-speech-synthesis-with-spatial-perception-2411.13314"/></url>
<url><loc>https://scifaro.com/en/abs/cafe-a-novel-code-switching-dataset-for-algerian-dialect-french-and-english-2411.13424</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cafe-a-novel-code-switching-dataset-for-algerian-dialect-french-and-english-2411.13424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cafe-a-novel-code-switching-dataset-for-algerian-dialect-french-and-english-2411.13424"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-speech-analysis-and-correction-tool-for-arabic-speaking-children-2411.13592</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-speech-analysis-and-correction-tool-for-arabic-speaking-children-2411.13592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-speech-analysis-and-correction-tool-for-arabic-speaking-children-2411.13592"/></url>
<url><loc>https://scifaro.com/en/abs/tiny-align-bridging-automatic-speech-recognition-and-large-language-model-on-the-edge-2411.13766</loc><lastmod>2025-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiny-align-bridging-automatic-speech-recognition-and-large-language-model-on-the-edge-2411.13766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiny-align-bridging-automatic-speech-recognition-and-large-language-model-on-the-edge-2411.13766"/></url>
<url><loc>https://scifaro.com/en/abs/x-crossnet-a-complex-spectral-mapping-approach-to-target-speaker-extraction-with-cross-attention-speaker-embedding-fusion-2411.13811</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-crossnet-a-complex-spectral-mapping-approach-to-target-speaker-extraction-with-cross-attention-speaker-embedding-fusion-2411.13811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-crossnet-a-complex-spectral-mapping-approach-to-target-speaker-extraction-with-cross-attention-speaker-embedding-fusion-2411.13811"/></url>
<url><loc>https://scifaro.com/en/abs/harp-a-large-scale-higher-order-ambisonic-room-impulse-response-dataset-2411.14207</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harp-a-large-scale-higher-order-ambisonic-room-impulse-response-dataset-2411.14207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harp-a-large-scale-higher-order-ambisonic-room-impulse-response-dataset-2411.14207"/></url>
<url><loc>https://scifaro.com/en/abs/attention-guided-spectrogram-sequence-modeling-with-cnns-for-music-genre-classification-2411.14474</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-guided-spectrogram-sequence-modeling-with-cnns-for-music-genre-classification-2411.14474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-guided-spectrogram-sequence-modeling-with-cnns-for-music-genre-classification-2411.14474"/></url>
<url><loc>https://scifaro.com/en/abs/listening-for-expert-identified-linguistic-features-assessment-of-audio-deepfake-discernment-among-undergraduate-students-2411.14586</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-for-expert-identified-linguistic-features-assessment-of-audio-deepfake-discernment-among-undergraduate-students-2411.14586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-for-expert-identified-linguistic-features-assessment-of-audio-deepfake-discernment-among-undergraduate-students-2411.14586"/></url>
<url><loc>https://scifaro.com/en/abs/generative-ai-for-music-and-audio-2411.14627</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-ai-for-music-and-audio-2411.14627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-ai-for-music-and-audio-2411.14627"/></url>
<url><loc>https://scifaro.com/en/abs/mode-conditioned-music-learning-and-composition-a-spiking-neural-network-inspired-by-neuroscience-and-psychology-2411.14773</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mode-conditioned-music-learning-and-composition-a-spiking-neural-network-inspired-by-neuroscience-and-psychology-2411.14773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mode-conditioned-music-learning-and-composition-a-spiking-neural-network-inspired-by-neuroscience-and-psychology-2411.14773"/></url>
<url><loc>https://scifaro.com/en/abs/who-can-withstand-chat-audio-attacks-an-evaluation-benchmark-for-large-audio-language-models-2411.14842</loc><lastmod>2025-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-can-withstand-chat-audio-attacks-an-evaluation-benchmark-for-large-audio-language-models-2411.14842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-can-withstand-chat-audio-attacks-an-evaluation-benchmark-for-large-audio-language-models-2411.14842"/></url>
<url><loc>https://scifaro.com/en/abs/dairhum-a-platform-for-directly-aligning-ai-representations-with-human-musical-judgments-applied-to-carnatic-music-2411.14907</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dairhum-a-platform-for-directly-aligning-ai-representations-with-human-musical-judgments-applied-to-carnatic-music-2411.14907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dairhum-a-platform-for-directly-aligning-ai-representations-with-human-musical-judgments-applied-to-carnatic-music-2411.14907"/></url>
<url><loc>https://scifaro.com/en/abs/towards-speaker-identification-with-minimal-dataset-and-constrained-resources-using-1d-convolution-neural-network-2411.15082</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-speaker-identification-with-minimal-dataset-and-constrained-resources-using-1d-convolution-neural-network-2411.15082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-speaker-identification-with-minimal-dataset-and-constrained-resources-using-1d-convolution-neural-network-2411.15082"/></url>
<url><loc>https://scifaro.com/en/abs/hindi-audio-video-deepfake-hav-df-a-hindi-language-based-audio-video-deepfake-dataset-2411.15457</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hindi-audio-video-deepfake-hav-df-a-hindi-language-based-audio-video-deepfake-dataset-2411.15457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hindi-audio-video-deepfake-hav-df-a-hindi-language-based-audio-video-deepfake-dataset-2411.15457"/></url>
<url><loc>https://scifaro.com/en/abs/repurposing-image-diffusion-models-for-training-free-music-style-transfer-on-mel-spectrograms-2411.15913</loc><lastmod>2026-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repurposing-image-diffusion-models-for-training-free-music-style-transfer-on-mel-spectrograms-2411.15913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repurposing-image-diffusion-models-for-training-free-music-style-transfer-on-mel-spectrograms-2411.15913"/></url>
<url><loc>https://scifaro.com/en/abs/qr-vc-leveraging-quantization-residuals-for-linear-disentanglement-in-zero-shot-voice-conversion-2411.16147</loc><lastmod>2025-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qr-vc-leveraging-quantization-residuals-for-linear-disentanglement-in-zero-shot-voice-conversion-2411.16147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qr-vc-leveraging-quantization-residuals-for-linear-disentanglement-in-zero-shot-voice-conversion-2411.16147"/></url>
<url><loc>https://scifaro.com/en/abs/the-svasr-system-for-text-dependent-speaker-verification-tdsv-aaic-challenge-2024-2411.16276</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-svasr-system-for-text-dependent-speaker-verification-tdsv-aaic-challenge-2024-2411.16276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-svasr-system-for-text-dependent-speaker-verification-tdsv-aaic-challenge-2024-2411.16276"/></url>
<url><loc>https://scifaro.com/en/abs/dim-gestor-co-speech-gesture-generation-with-adaptive-layer-normalization-mamba-2-2411.16729</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dim-gestor-co-speech-gesture-generation-with-adaptive-layer-normalization-mamba-2-2411.16729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dim-gestor-co-speech-gesture-generation-with-adaptive-layer-normalization-mamba-2-2411.16729"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-asr-methods-for-speech-deepfake-detection-2411.17349</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-asr-methods-for-speech-deepfake-detection-2411.17349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-asr-methods-for-speech-deepfake-detection-2411.17349"/></url>
<url><loc>https://scifaro.com/en/abs/music2fail-transfer-music-to-failed-recorder-style-2411.18075</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music2fail-transfer-music-to-failed-recorder-style-2411.18075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music2fail-transfer-music-to-failed-recorder-style-2411.18075"/></url>
<url><loc>https://scifaro.com/en/abs/fusion-of-discrete-representations-and-self-augmented-representations-for-multilingual-automatic-speech-recognition-2411.18107</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusion-of-discrete-representations-and-self-augmented-representations-for-multilingual-automatic-speech-recognition-2411.18107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusion-of-discrete-representations-and-self-augmented-representations-for-multilingual-automatic-speech-recognition-2411.18107"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-learn-a-new-language-an-efficient-solution-for-self-supervised-learning-models-unseen-languages-adaption-in-low-resource-scenario-2411.18217</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-learn-a-new-language-an-efficient-solution-for-self-supervised-learning-models-unseen-languages-adaption-in-low-resource-scenario-2411.18217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-learn-a-new-language-an-efficient-solution-for-self-supervised-learning-models-unseen-languages-adaption-in-low-resource-scenario-2411.18217"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-choice-learning-for-efficient-speech-separation-with-many-speakers-2411.18497</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-choice-learning-for-efficient-speech-separation-with-many-speakers-2411.18497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-choice-learning-for-efficient-speech-separation-with-many-speakers-2411.18497"/></url>
<url><loc>https://scifaro.com/en/abs/towards-advanced-speech-signal-processing-a-statistical-perspective-on-convolution-based-architectures-and-its-applications-2411.18636</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-advanced-speech-signal-processing-a-statistical-perspective-on-convolution-based-architectures-and-its-applications-2411.18636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-advanced-speech-signal-processing-a-statistical-perspective-on-convolution-based-architectures-and-its-applications-2411.18636"/></url>
<url><loc>https://scifaro.com/en/abs/codiff-vc-a-codec-assisted-diffusion-model-for-zero-shot-voice-conversion-2411.18918</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codiff-vc-a-codec-assisted-diffusion-model-for-zero-shot-voice-conversion-2411.18918"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codiff-vc-a-codec-assisted-diffusion-model-for-zero-shot-voice-conversion-2411.18918"/></url>
<url><loc>https://scifaro.com/en/abs/a-voice-based-triage-for-type-2-diabetes-using-a-conversational-virtual-assistant-in-the-home-environment-2411.19204</loc><lastmod>2025-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-voice-based-triage-for-type-2-diabetes-using-a-conversational-virtual-assistant-in-the-home-environment-2411.19204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-voice-based-triage-for-type-2-diabetes-using-a-conversational-virtual-assistant-in-the-home-environment-2411.19204"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-transfer-learning-for-music-foundation-models-2411.19371</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-for-music-foundation-models-2411.19371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-for-music-foundation-models-2411.19371"/></url>
<url><loc>https://scifaro.com/en/abs/memristive-nanowire-network-for-energy-efficient-audio-classification-pre-processing-free-reservoir-computing-with-reduced-latency-2411.19611</loc><lastmod>2025-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memristive-nanowire-network-for-energy-efficient-audio-classification-pre-processing-free-reservoir-computing-with-reduced-latency-2411.19611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memristive-nanowire-network-for-energy-efficient-audio-classification-pre-processing-free-reservoir-computing-with-reduced-latency-2411.19611"/></url>
<url><loc>https://scifaro.com/en/abs/noro-noise-robust-one-shot-voice-conversion-with-hidden-speaker-representation-learning-2411.19770</loc><lastmod>2025-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noro-noise-robust-one-shot-voice-conversion-with-hidden-speaker-representation-learning-2411.19770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noro-noise-robust-one-shot-voice-conversion-with-hidden-speaker-representation-learning-2411.19770"/></url>
<url><loc>https://scifaro.com/en/abs/voice-communication-analysis-in-esports-2411.19793</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-communication-analysis-in-esports-2411.19793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-communication-analysis-in-esports-2411.19793"/></url>
<url><loc>https://scifaro.com/en/abs/a-cross-corpus-speech-emotion-recognition-method-based-on-supervised-contrastive-learning-2411.19803</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cross-corpus-speech-emotion-recognition-method-based-on-supervised-contrastive-learning-2411.19803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cross-corpus-speech-emotion-recognition-method-based-on-supervised-contrastive-learning-2411.19803"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-musical-stem-retrieval-with-joint-embedding-predictive-architectures-2411.19806</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-musical-stem-retrieval-with-joint-embedding-predictive-architectures-2411.19806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-musical-stem-retrieval-with-joint-embedding-predictive-architectures-2411.19806"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-stacked-aggregated-network-for-voice-authentication-in-iot-enabled-smart-devices-2411.19841</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-stacked-aggregated-network-for-voice-authentication-in-iot-enabled-smart-devices-2411.19841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-stacked-aggregated-network-for-voice-authentication-in-iot-enabled-smart-devices-2411.19841"/></url>
<url><loc>https://scifaro.com/en/abs/musical-composition-and-2d-cellular-automata-based-on-music-intervals-2411.19844</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-composition-and-2d-cellular-automata-based-on-music-intervals-2411.19844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-composition-and-2d-cellular-automata-based-on-music-intervals-2411.19844"/></url>
<url><loc>https://scifaro.com/en/abs/raw-audio-classification-with-cosine-convolutional-neural-network-coscovnn-2412.00312</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raw-audio-classification-with-cosine-convolutional-neural-network-coscovnn-2412.00312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raw-audio-classification-with-cosine-convolutional-neural-network-coscovnn-2412.00312"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-verification-robustness-with-synthetic-emotional-utterances-2412.00319</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-verification-robustness-with-synthetic-emotional-utterances-2412.00319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-verification-robustness-with-synthetic-emotional-utterances-2412.00319"/></url>
<url><loc>https://scifaro.com/en/abs/musicgen-chord-advancing-music-generation-through-chord-progressions-and-interactive-web-ui-2412.00325</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicgen-chord-advancing-music-generation-through-chord-progressions-and-interactive-web-ui-2412.00325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicgen-chord-advancing-music-generation-through-chord-progressions-and-interactive-web-ui-2412.00325"/></url>
<url><loc>https://scifaro.com/en/abs/sample-adaptive-data-augmentation-with-progressive-scheduling-2412.00415</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-adaptive-data-augmentation-with-progressive-scheduling-2412.00415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-adaptive-data-augmentation-with-progressive-scheduling-2412.00415"/></url>
<url><loc>https://scifaro.com/en/abs/personal-sound-zones-and-shielded-localized-communication-through-active-acoustic-control-2412.00456</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personal-sound-zones-and-shielded-localized-communication-through-active-acoustic-control-2412.00456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personal-sound-zones-and-shielded-localized-communication-through-active-acoustic-control-2412.00456"/></url>
<url><loc>https://scifaro.com/en/abs/from-audio-deepfake-detection-to-ai-generated-music-detection-a-pathway-and-overview-2412.00571</loc><lastmod>2024-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-audio-deepfake-detection-to-ai-generated-music-detection-a-pathway-and-overview-2412.00571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-audio-deepfake-detection-to-ai-generated-music-detection-a-pathway-and-overview-2412.00571"/></url>
<url><loc>https://scifaro.com/en/abs/audio-atlas-visualizing-and-exploring-audio-datasets-2412.00591</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-atlas-visualizing-and-exploring-audio-datasets-2412.00591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-atlas-visualizing-and-exploring-audio-datasets-2412.00591"/></url>
<url><loc>https://scifaro.com/en/abs/complexity-boosted-adaptive-training-for-better-low-resource-asr-performance-2412.00877</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complexity-boosted-adaptive-training-for-better-low-resource-asr-performance-2412.00877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complexity-boosted-adaptive-training-for-better-low-resource-asr-performance-2412.00877"/></url>
<url><loc>https://scifaro.com/en/abs/freecodec-a-disentangled-neural-speech-codec-with-fewer-tokens-2412.01053</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freecodec-a-disentangled-neural-speech-codec-with-fewer-tokens-2412.01053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freecodec-a-disentangled-neural-speech-codec-with-fewer-tokens-2412.01053"/></url>
<url><loc>https://scifaro.com/en/abs/the-codec-language-model-based-zero-shot-spontaneous-style-tts-system-for-covoc-challenge-2024-2412.01100</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-codec-language-model-based-zero-shot-spontaneous-style-tts-system-for-covoc-challenge-2024-2412.01100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-codec-language-model-based-zero-shot-spontaneous-style-tts-system-for-covoc-challenge-2024-2412.01100"/></url>
<url><loc>https://scifaro.com/en/abs/reject-threshold-adaptation-for-open-set-model-attribution-of-deepfake-audio-2412.01425</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reject-threshold-adaptation-for-open-set-model-attribution-of-deepfake-audio-2412.01425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reject-threshold-adaptation-for-open-set-model-attribution-of-deepfake-audio-2412.01425"/></url>
<url><loc>https://scifaro.com/en/abs/generative-ai-based-data-augmentation-for-improved-bioacoustic-classification-in-noisy-environments-2412.01530</loc><lastmod>2025-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-ai-based-data-augmentation-for-improved-bioacoustic-classification-in-noisy-environments-2412.01530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-ai-based-data-augmentation-for-improved-bioacoustic-classification-in-noisy-environments-2412.01530"/></url>
<url><loc>https://scifaro.com/en/abs/it-takes-two-real-time-co-speech-two-person-s-interaction-generation-via-reactive-auto-regressive-diffusion-model-2412.02419</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/it-takes-two-real-time-co-speech-two-person-s-interaction-generation-via-reactive-auto-regressive-diffusion-model-2412.02419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/it-takes-two-real-time-co-speech-two-person-s-interaction-generation-via-reactive-auto-regressive-diffusion-model-2412.02419"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-abnormal-heart-sound-using-mobile-phones-and-on-device-iconnet-2412.03267</loc><lastmod>2024-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-abnormal-heart-sound-using-mobile-phones-and-on-device-iconnet-2412.03267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-abnormal-heart-sound-using-mobile-phones-and-on-device-iconnet-2412.03267"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-trends-in-audio-mixes-and-masters-insights-from-a-dataset-analysis-2412.03373</loc><lastmod>2024-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-trends-in-audio-mixes-and-masters-insights-from-a-dataset-analysis-2412.03373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-trends-in-audio-mixes-and-masters-insights-from-a-dataset-analysis-2412.03373"/></url>
<url><loc>https://scifaro.com/en/abs/diffstyletts-diffusion-based-hierarchical-prosody-modeling-for-text-to-speech-with-diverse-and-controllable-styles-2412.03388</loc><lastmod>2024-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffstyletts-diffusion-based-hierarchical-prosody-modeling-for-text-to-speech-with-diverse-and-controllable-styles-2412.03388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffstyletts-diffusion-based-hierarchical-prosody-modeling-for-text-to-speech-with-diverse-and-controllable-styles-2412.03388"/></url>
<url><loc>https://scifaro.com/en/abs/nbm-an-open-dataset-for-the-acoustic-monitoring-of-nocturnal-migratory-birds-in-europe-2412.03633</loc><lastmod>2025-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nbm-an-open-dataset-for-the-acoustic-monitoring-of-nocturnal-migratory-birds-in-europe-2412.03633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nbm-an-open-dataset-for-the-acoustic-monitoring-of-nocturnal-migratory-birds-in-europe-2412.03633"/></url>
<url><loc>https://scifaro.com/en/abs/embedding-space-diffusion-for-zero-shot-environmental-sound-classification-2412.03771</loc><lastmod>2025-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedding-space-diffusion-for-zero-shot-environmental-sound-classification-2412.03771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedding-space-diffusion-for-zero-shot-environmental-sound-classification-2412.03771"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-based-feature-extraction-for-enhanced-automatic-severity-classification-in-dysarthric-speech-2412.03784</loc><lastmod>2024-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-based-feature-extraction-for-enhanced-automatic-severity-classification-in-dysarthric-speech-2412.03784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-based-feature-extraction-for-enhanced-automatic-severity-classification-in-dysarthric-speech-2412.03784"/></url>
<url><loc>https://scifaro.com/en/abs/missing-melodies-ai-music-generation-and-its-nearly-complete-omission-of-the-global-south-2412.04100</loc><lastmod>2025-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/missing-melodies-ai-music-generation-and-its-nearly-complete-omission-of-the-global-south-2412.04100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/missing-melodies-ai-music-generation-and-its-nearly-complete-omission-of-the-global-south-2412.04100"/></url>
<url><loc>https://scifaro.com/en/abs/relationships-between-keywords-and-strong-beats-in-lyrical-music-2412.04202</loc><lastmod>2025-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relationships-between-keywords-and-strong-beats-in-lyrical-music-2412.04202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relationships-between-keywords-and-strong-beats-in-lyrical-music-2412.04202"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-nonuniform-energy-decay-through-the-modal-decomposition-of-acoustic-radiance-transfer-mod-art-2412.04534</loc><lastmod>2025-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-nonuniform-energy-decay-through-the-modal-decomposition-of-acoustic-radiance-transfer-mod-art-2412.04534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-nonuniform-energy-decay-through-the-modal-decomposition-of-acoustic-radiance-transfer-mod-art-2412.04534"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-transformer-based-music-overpainting-for-jazz-piano-variations-2412.04610</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-transformer-based-music-overpainting-for-jazz-piano-variations-2412.04610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-transformer-based-music-overpainting-for-jazz-piano-variations-2412.04610"/></url>
<url><loc>https://scifaro.com/en/abs/diff4steer-steerable-diffusion-prior-for-generative-music-retrieval-with-semantic-guidance-2412.04746</loc><lastmod>2025-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff4steer-steerable-diffusion-prior-for-generative-music-retrieval-with-semantic-guidance-2412.04746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff4steer-steerable-diffusion-prior-for-generative-music-retrieval-with-semantic-guidance-2412.04746"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-tokens-makes-llms-robust-multi-modality-learners-2412.04917</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-tokens-makes-llms-robust-multi-modality-learners-2412.04917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-tokens-makes-llms-robust-multi-modality-learners-2412.04917"/></url>
<url><loc>https://scifaro.com/en/abs/applying-automatic-differentiation-to-optimize-differential-microphone-array-designs-2412.05123</loc><lastmod>2024-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-automatic-differentiation-to-optimize-differential-microphone-array-designs-2412.05123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-automatic-differentiation-to-optimize-differential-microphone-array-designs-2412.05123"/></url>
<url><loc>https://scifaro.com/en/abs/pyampact-a-score-audio-alignment-toolkit-for-performance-data-estimation-and-multi-modal-processing-2412.05436</loc><lastmod>2026-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pyampact-a-score-audio-alignment-toolkit-for-performance-data-estimation-and-multi-modal-processing-2412.05436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pyampact-a-score-audio-alignment-toolkit-for-performance-data-estimation-and-multi-modal-processing-2412.05436"/></url>
<url><loc>https://scifaro.com/en/abs/wavfusion-towards-wav2vec-2-0-multimodal-speech-emotion-recognition-2412.05558</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavfusion-towards-wav2vec-2-0-multimodal-speech-emotion-recognition-2412.05558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavfusion-towards-wav2vec-2-0-multimodal-speech-emotion-recognition-2412.05558"/></url>
<url><loc>https://scifaro.com/en/abs/when-vision-models-meet-parameter-efficient-look-aside-adapters-without-large-scale-audio-pretraining-2412.05951</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/when-vision-models-meet-parameter-efficient-look-aside-adapters-without-large-scale-audio-pretraining-2412.05951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/when-vision-models-meet-parameter-efficient-look-aside-adapters-without-large-scale-audio-pretraining-2412.05951"/></url>
<url><loc>https://scifaro.com/en/abs/m6-multi-generator-multi-domain-multi-lingual-and-cultural-multi-genres-multi-instrument-machine-generated-music-detection-databases-2412.06001</loc><lastmod>2026-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m6-multi-generator-multi-domain-multi-lingual-and-cultural-multi-genres-multi-instrument-machine-generated-music-detection-databases-2412.06001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m6-multi-generator-multi-domain-multi-lingual-and-cultural-multi-genres-multi-instrument-machine-generated-music-detection-databases-2412.06001"/></url>
<url><loc>https://scifaro.com/en/abs/pilot-guided-multimodal-semantic-communication-for-audio-visual-event-localization-2412.06208</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pilot-guided-multimodal-semantic-communication-for-audio-visual-event-localization-2412.06208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pilot-guided-multimodal-semantic-communication-for-audio-visual-event-localization-2412.06208"/></url>
<url><loc>https://scifaro.com/en/abs/vidmusician-video-to-music-generation-with-semantic-rhythmic-alignment-via-hierarchical-visual-features-2412.06296</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vidmusician-video-to-music-generation-with-semantic-rhythmic-alignment-via-hierarchical-visual-features-2412.06296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vidmusician-video-to-music-generation-with-semantic-rhythmic-alignment-via-hierarchical-visual-features-2412.06296"/></url>
<url><loc>https://scifaro.com/en/abs/emospeech-a-corpus-of-emotionally-rich-and-contextually-detailed-speech-annotations-2412.06581</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emospeech-a-corpus-of-emotionally-rich-and-contextually-detailed-speech-annotations-2412.06581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emospeech-a-corpus-of-emotionally-rich-and-contextually-detailed-speech-annotations-2412.06581"/></url>
<url><loc>https://scifaro.com/en/abs/ai-trackmate-finally-someone-who-will-give-your-music-more-than-just-sounds-great-2412.06617</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-trackmate-finally-someone-who-will-give-your-music-more-than-just-sounds-great-2412.06617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-trackmate-finally-someone-who-will-give-your-music-more-than-just-sounds-great-2412.06617"/></url>
<url><loc>https://scifaro.com/en/abs/mumu-llama-multi-modal-music-understanding-and-generation-via-large-language-models-2412.06660</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mumu-llama-multi-modal-music-understanding-and-generation-via-large-language-models-2412.06660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mumu-llama-multi-modal-music-understanding-and-generation-via-large-language-models-2412.06660"/></url>
<url><loc>https://scifaro.com/en/abs/source-separation-automatic-transcription-for-music-2412.06703</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-separation-automatic-transcription-for-music-2412.06703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-separation-automatic-transcription-for-music-2412.06703"/></url>
<url><loc>https://scifaro.com/en/abs/improving-music-source-separation-with-diffusion-and-consistency-refinement-2412.06965</loc><lastmod>2026-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-music-source-separation-with-diffusion-and-consistency-refinement-2412.06965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-music-source-separation-with-diffusion-and-consistency-refinement-2412.06965"/></url>
<url><loc>https://scifaro.com/en/abs/preserving-speaker-information-in-direct-speech-to-speech-translation-with-non-autoregressive-generation-and-pretraining-2412.07316</loc><lastmod>2025-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preserving-speaker-information-in-direct-speech-to-speech-translation-with-non-autoregressive-generation-and-pretraining-2412.07316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preserving-speaker-information-in-direct-speech-to-speech-translation-with-non-autoregressive-generation-and-pretraining-2412.07316"/></url>
<url><loc>https://scifaro.com/en/abs/frechet-music-distance-a-metric-for-generative-symbolic-music-evaluation-2412.07948</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frechet-music-distance-a-metric-for-generative-symbolic-music-evaluation-2412.07948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frechet-music-distance-a-metric-for-generative-symbolic-music-evaluation-2412.07948"/></url>
<url><loc>https://scifaro.com/en/abs/aligner-guided-training-paradigm-advancing-text-to-speech-models-with-aligner-guided-duration-2412.08112</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aligner-guided-training-paradigm-advancing-text-to-speech-models-with-aligner-guided-duration-2412.08112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aligner-guided-training-paradigm-advancing-text-to-speech-models-with-aligner-guided-duration-2412.08112"/></url>
<url><loc>https://scifaro.com/en/abs/latentspeech-latent-diffusion-for-text-to-speech-generation-2412.08117</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latentspeech-latent-diffusion-for-text-to-speech-generation-2412.08117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latentspeech-latent-diffusion-for-text-to-speech-generation-2412.08117"/></url>
<url><loc>https://scifaro.com/en/abs/touchtts-an-embarrassingly-simple-tts-framework-that-everyone-can-touch-2412.08237</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/touchtts-an-embarrassingly-simple-tts-framework-that-everyone-can-touch-2412.08237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/touchtts-an-embarrassingly-simple-tts-framework-that-everyone-can-touch-2412.08237"/></url>
<url><loc>https://scifaro.com/en/abs/momuse-momentum-multi-modal-target-speaker-extraction-for-real-time-scenarios-with-impaired-visual-cues-2412.08247</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/momuse-momentum-multi-modal-target-speaker-extraction-for-real-time-scenarios-with-impaired-visual-cues-2412.08247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/momuse-momentum-multi-modal-target-speaker-extraction-for-real-time-scenarios-with-impaired-visual-cues-2412.08247"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-model-for-voice-and-accent-conversion-in-speech-and-singing-using-self-supervised-learning-and-feature-extraction-2412.08312</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-model-for-voice-and-accent-conversion-in-speech-and-singing-using-self-supervised-learning-and-feature-extraction-2412.08312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-model-for-voice-and-accent-conversion-in-speech-and-singing-using-self-supervised-learning-and-feature-extraction-2412.08312"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-mono-to-binaural-speech-synthesis-2412.08356</loc><lastmod>2025-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-mono-to-binaural-speech-synthesis-2412.08356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-mono-to-binaural-speech-synthesis-2412.08356"/></url>
<url><loc>https://scifaro.com/en/abs/pointtalk-audio-driven-dynamic-lip-point-cloud-for-3d-gaussian-based-talking-head-synthesis-2412.08504</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pointtalk-audio-driven-dynamic-lip-point-cloud-for-3d-gaussian-based-talking-head-synthesis-2412.08504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pointtalk-audio-driven-dynamic-lip-point-cloud-for-3d-gaussian-based-talking-head-synthesis-2412.08504"/></url>
<url><loc>https://scifaro.com/en/abs/sketch2sound-controllable-audio-generation-via-time-varying-signals-and-sonic-imitations-2412.08550</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sketch2sound-controllable-audio-generation-via-time-varying-signals-and-sonic-imitations-2412.08550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sketch2sound-controllable-audio-generation-via-time-varying-signals-and-sonic-imitations-2412.08550"/></url>
<url><loc>https://scifaro.com/en/abs/mel-refine-a-plug-and-play-approach-to-refine-mel-spectrogram-in-audio-generation-2412.08577</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-refine-a-plug-and-play-approach-to-refine-mel-spectrogram-in-audio-generation-2412.08577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-refine-a-plug-and-play-approach-to-refine-mel-spectrogram-in-audio-generation-2412.08577"/></url>
<url><loc>https://scifaro.com/en/abs/advwave-stealthy-adversarial-jailbreak-attack-against-large-audio-language-models-2412.08608</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advwave-stealthy-adversarial-jailbreak-attack-against-large-audio-language-models-2412.08608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advwave-stealthy-adversarial-jailbreak-attack-against-large-audio-language-models-2412.08608"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-vietnamese-speech-based-depression-diagnosis-using-dynamic-attention-mechanism-2412.08683</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-vietnamese-speech-based-depression-diagnosis-using-dynamic-attention-mechanism-2412.08683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-vietnamese-speech-based-depression-diagnosis-using-dynamic-attention-mechanism-2412.08683"/></url>
<url><loc>https://scifaro.com/en/abs/complex-cycle-consistent-diffusion-model-for-monaural-speech-enhancement-2412.08856</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-cycle-consistent-diffusion-model-for-monaural-speech-enhancement-2412.08856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-cycle-consistent-diffusion-model-for-monaural-speech-enhancement-2412.08856"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-graphic-notation-with-musicldm-an-ai-improvisation-of-cornelius-cardew-s-treatise-2412.08944</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-graphic-notation-with-musicldm-an-ai-improvisation-of-cornelius-cardew-s-treatise-2412.08944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-graphic-notation-with-musicldm-an-ai-improvisation-of-cornelius-cardew-s-treatise-2412.08944"/></url>
<url><loc>https://scifaro.com/en/abs/emodubber-towards-high-quality-and-emotion-controllable-movie-dubbing-2412.08988</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emodubber-towards-high-quality-and-emotion-controllable-movie-dubbing-2412.08988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emodubber-towards-high-quality-and-emotion-controllable-movie-dubbing-2412.08988"/></url>
<url><loc>https://scifaro.com/en/abs/speech-forensics-towards-comprehensive-synthetic-speech-dataset-establishment-and-analysis-2412.09032</loc><lastmod>2025-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-forensics-towards-comprehensive-synthetic-speech-dataset-establishment-and-analysis-2412.09032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-forensics-towards-comprehensive-synthetic-speech-dataset-establishment-and-analysis-2412.09032"/></url>
<url><loc>https://scifaro.com/en/abs/yingsound-video-guided-sound-effects-generation-with-multi-modal-chain-of-thought-controls-2412.09168</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yingsound-video-guided-sound-effects-generation-with-multi-modal-chain-of-thought-controls-2412.09168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yingsound-video-guided-sound-effects-generation-with-multi-modal-chain-of-thought-controls-2412.09168"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-generation-and-removal-of-speaker-adversarial-perturbation-for-voice-privacy-protection-2412.09195</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-generation-and-removal-of-speaker-adversarial-perturbation-for-voice-privacy-protection-2412.09195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-generation-and-removal-of-speaker-adversarial-perturbation-for-voice-privacy-protection-2412.09195"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-sentiment-analysis-based-on-video-and-audio-inputs-2412.09317</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-sentiment-analysis-based-on-video-and-audio-inputs-2412.09317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-sentiment-analysis-based-on-video-and-audio-inputs-2412.09317"/></url>
<url><loc>https://scifaro.com/en/abs/audios-don-t-lie-multi-frequency-channel-attention-mechanism-for-audio-deepfake-detection-2412.09467</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audios-don-t-lie-multi-frequency-channel-attention-mechanism-for-audio-deepfake-detection-2412.09467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audios-don-t-lie-multi-frequency-channel-attention-mechanism-for-audio-deepfake-detection-2412.09467"/></url>
<url><loc>https://scifaro.com/en/abs/sila-signal-to-language-augmentation-for-enhanced-control-in-text-to-audio-generation-2412.09789</loc><lastmod>2024-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sila-signal-to-language-augmentation-for-enhanced-control-in-text-to-audio-generation-2412.09789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sila-signal-to-language-augmentation-for-enhanced-control-in-text-to-audio-generation-2412.09789"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-multimodal-methods-and-spontaneous-speech-for-alzheimer-s-disease-identification-2412.09928</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-multimodal-methods-and-spontaneous-speech-for-alzheimer-s-disease-identification-2412.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-multimodal-methods-and-spontaneous-speech-for-alzheimer-s-disease-identification-2412.09928"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-speech-emotion-recognition-with-efficient-channel-attention-guided-deep-cnn-bilstm-framework-2412.10011</loc><lastmod>2024-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-speech-emotion-recognition-with-efficient-channel-attention-guided-deep-cnn-bilstm-framework-2412.10011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-speech-emotion-recognition-with-efficient-channel-attention-guided-deep-cnn-bilstm-framework-2412.10011"/></url>
<url><loc>https://scifaro.com/en/abs/cosyvoice-2-scalable-streaming-speech-synthesis-with-large-language-models-2412.10117</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosyvoice-2-scalable-streaming-speech-synthesis-with-large-language-models-2412.10117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosyvoice-2-scalable-streaming-speech-synthesis-with-large-language-models-2412.10117"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-mel-frequency-cepstral-coefficients-and-wavelet-based-audio-signal-processing-for-emotion-detection-and-mental-health-assessment-in-spoken-speech-2412.10469</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-mel-frequency-cepstral-coefficients-and-wavelet-based-audio-signal-processing-for-emotion-detection-and-mental-health-assessment-in-spoken-speech-2412.10469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-mel-frequency-cepstral-coefficients-and-wavelet-based-audio-signal-processing-for-emotion-detection-and-mental-health-assessment-in-spoken-speech-2412.10469"/></url>
<url><loc>https://scifaro.com/en/abs/tipping-points-pulse-elasticity-and-tonal-tension-an-empirical-study-on-what-generates-tipping-points-2412.10481</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tipping-points-pulse-elasticity-and-tonal-tension-an-empirical-study-on-what-generates-tipping-points-2412.10481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tipping-points-pulse-elasticity-and-tonal-tension-an-empirical-study-on-what-generates-tipping-points-2412.10481"/></url>
<url><loc>https://scifaro.com/en/abs/hidden-echoes-survive-training-in-audio-to-audio-generative-instrument-models-2412.10649</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hidden-echoes-survive-training-in-audio-to-audio-generative-instrument-models-2412.10649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hidden-echoes-survive-training-in-audio-to-audio-generative-instrument-models-2412.10649"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-anomaly-detection-in-industrial-machines-using-deep-one-class-support-vector-data-description-2412.10792</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-anomaly-detection-in-industrial-machines-using-deep-one-class-support-vector-data-description-2412.10792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-anomaly-detection-in-industrial-machines-using-deep-one-class-support-vector-data-description-2412.10792"/></url>
<url><loc>https://scifaro.com/en/abs/robust-persian-digit-recognition-in-noisy-environments-using-hybrid-cnn-bigru-model-2412.10857</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-persian-digit-recognition-in-noisy-environments-using-hybrid-cnn-bigru-model-2412.10857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-persian-digit-recognition-in-noisy-environments-using-hybrid-cnn-bigru-model-2412.10857"/></url>
<url><loc>https://scifaro.com/en/abs/composers-evaluations-of-an-ai-music-tool-insights-for-human-centred-design-2412.10968</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composers-evaluations-of-an-ai-music-tool-insights-for-human-centred-design-2412.10968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composers-evaluations-of-an-ai-music-tool-insights-for-human-centred-design-2412.10968"/></url>
<url><loc>https://scifaro.com/en/abs/whisperflow-speech-foundation-models-in-real-time-2412.11272</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisperflow-speech-foundation-models-in-real-time-2412.11272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisperflow-speech-foundation-models-in-real-time-2412.11272"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-gpt-a-hybrid-representation-audio-large-language-model-2412.11449</loc><lastmod>2024-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-gpt-a-hybrid-representation-audio-large-language-model-2412.11449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-gpt-a-hybrid-representation-audio-large-language-model-2412.11449"/></url>
<url><loc>https://scifaro.com/en/abs/region-based-optimization-in-continual-learning-for-audio-deepfake-detection-2412.11551</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/region-based-optimization-in-continual-learning-for-audio-deepfake-detection-2412.11551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/region-based-optimization-in-continual-learning-for-audio-deepfake-detection-2412.11551"/></url>
<url><loc>https://scifaro.com/en/abs/does-it-chug-towards-a-data-driven-understanding-of-guitar-tone-description-2412.11769</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-it-chug-towards-a-data-driven-understanding-of-guitar-tone-description-2412.11769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-it-chug-towards-a-data-driven-understanding-of-guitar-tone-description-2412.11769"/></url>
<url><loc>https://scifaro.com/en/abs/audiocil-a-python-toolbox-for-audio-class-incremental-learning-with-multiple-scenes-2412.11907</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiocil-a-python-toolbox-for-audio-class-incremental-learning-with-multiple-scenes-2412.11907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiocil-a-python-toolbox-for-audio-class-incremental-learning-with-multiple-scenes-2412.11907"/></url>
<url><loc>https://scifaro.com/en/abs/autrainer-a-modular-and-extensible-deep-learning-toolkit-for-computer-audition-tasks-2412.11943</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autrainer-a-modular-and-extensible-deep-learning-toolkit-for-computer-audition-tasks-2412.11943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autrainer-a-modular-and-extensible-deep-learning-toolkit-for-computer-audition-tasks-2412.11943"/></url>
<url><loc>https://scifaro.com/en/abs/voice-biomarker-analysis-and-automated-severity-classification-of-dysarthric-speech-in-a-multilingual-context-2412.12111</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-biomarker-analysis-and-automated-severity-classification-of-dysarthric-speech-in-a-multilingual-context-2412.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-biomarker-analysis-and-automated-severity-classification-of-dysarthric-speech-in-a-multilingual-context-2412.12111"/></url>
<url><loc>https://scifaro.com/en/abs/sound-classification-of-four-insect-classes-2412.12395</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-classification-of-four-insect-classes-2412.12395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-classification-of-four-insect-classes-2412.12395"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-control-of-emotion-rendering-in-speech-synthesis-2412.12498</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-control-of-emotion-rendering-in-speech-synthesis-2412.12498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-control-of-emotion-rendering-in-speech-synthesis-2412.12498"/></url>
<url><loc>https://scifaro.com/en/abs/libri2vox-dataset-target-speaker-extraction-with-diverse-speaker-conditions-and-synthetic-data-2412.12512</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libri2vox-dataset-target-speaker-extraction-with-diverse-speaker-conditions-and-synthetic-data-2412.12512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libri2vox-dataset-target-speaker-extraction-with-diverse-speaker-conditions-and-synthetic-data-2412.12512"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-level-feature-discrepancies-a-key-to-detecting-sophisticated-speech-deepfakes-2412.12619</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-level-feature-discrepancies-a-key-to-detecting-sophisticated-speech-deepfakes-2412.12619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-level-feature-discrepancies-a-key-to-detecting-sophisticated-speech-deepfakes-2412.12619"/></url>
<url><loc>https://scifaro.com/en/abs/camel-cross-attention-enhanced-mixture-of-experts-and-language-bias-for-code-switching-speech-recognition-2412.12760</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/camel-cross-attention-enhanced-mixture-of-experts-and-language-bias-for-code-switching-speech-recognition-2412.12760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/camel-cross-attention-enhanced-mixture-of-experts-and-language-bias-for-code-switching-speech-recognition-2412.12760"/></url>
<url><loc>https://scifaro.com/en/abs/tame-temporal-audio-based-mamba-for-enhanced-drone-trajectory-estimation-and-classification-2412.13037</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tame-temporal-audio-based-mamba-for-enhanced-drone-trajectory-estimation-and-classification-2412.13037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tame-temporal-audio-based-mamba-for-enhanced-drone-trajectory-estimation-and-classification-2412.13037"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-speech-classification-ieee-signal-processing-cup-2022-challenge-2412.13279</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-speech-classification-ieee-signal-processing-cup-2022-challenge-2412.13279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-speech-classification-ieee-signal-processing-cup-2022-challenge-2412.13279"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-detection-of-machine-generated-music-and-early-systematic-evaluation-2412.13421</loc><lastmod>2026-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-detection-of-machine-generated-music-and-early-systematic-evaluation-2412.13421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-detection-of-machine-generated-music-and-early-systematic-evaluation-2412.13421"/></url>
<url><loc>https://scifaro.com/en/abs/savgbench-benchmarking-spatially-aligned-audio-video-generation-2412.13462</loc><lastmod>2026-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/savgbench-benchmarking-spatially-aligned-audio-video-generation-2412.13462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/savgbench-benchmarking-spatially-aligned-audio-video-generation-2412.13462"/></url>
<url><loc>https://scifaro.com/en/abs/tuning-music-education-ai-powered-personalization-in-learning-music-2412.13514</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tuning-music-education-ai-powered-personalization-in-learning-music-2412.13514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tuning-music-education-ai-powered-personalization-in-learning-music-2412.13514"/></url>
<url><loc>https://scifaro.com/en/abs/folai-synchronized-foley-sound-generation-with-semantic-and-temporal-alignment-2412.15023</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/folai-synchronized-foley-sound-generation-with-semantic-and-temporal-alignment-2412.15023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/folai-synchronized-foley-sound-generation-with-semantic-and-temporal-alignment-2412.15023"/></url>
<url><loc>https://scifaro.com/en/abs/early-dementia-detection-using-multiple-spontaneous-speech-prompts-the-process-challenge-2412.15230</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/early-dementia-detection-using-multiple-spontaneous-speech-prompts-the-process-challenge-2412.15230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/early-dementia-detection-using-multiple-spontaneous-speech-prompts-the-process-challenge-2412.15230"/></url>
<url><loc>https://scifaro.com/en/abs/music-genre-classification-ensemble-learning-with-subcomponents-level-attention-2412.15602</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-genre-classification-ensemble-learning-with-subcomponents-level-attention-2412.15602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-genre-classification-ensemble-learning-with-subcomponents-level-attention-2412.15602"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-voip-communications-through-llm-based-real-time-speech-reconstruction-and-call-prioritization-for-emergency-services-2412.16176</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-voip-communications-through-llm-based-real-time-speech-reconstruction-and-call-prioritization-for-emergency-services-2412.16176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-voip-communications-through-llm-based-real-time-speech-reconstruction-and-call-prioritization-for-emergency-services-2412.16176"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-poultry-vocalizations-natural-language-processing-and-transformer-models-for-semantic-and-emotional-analysis-2412.16182</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-poultry-vocalizations-natural-language-processing-and-transformer-models-for-semantic-and-emotional-analysis-2412.16182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-poultry-vocalizations-natural-language-processing-and-transformer-models-for-semantic-and-emotional-analysis-2412.16182"/></url>
<url><loc>https://scifaro.com/en/abs/a-classification-benchmark-for-artificial-intelligence-detection-of-laryngeal-cancer-from-patient-voice-2412.16267</loc><lastmod>2025-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-classification-benchmark-for-artificial-intelligence-detection-of-laryngeal-cancer-from-patient-voice-2412.16267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-classification-benchmark-for-artificial-intelligence-detection-of-laryngeal-cancer-from-patient-voice-2412.16267"/></url>
<url><loc>https://scifaro.com/en/abs/text2midi-generating-symbolic-music-from-captions-2412.16526</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text2midi-generating-symbolic-music-from-captions-2412.16526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text2midi-generating-symbolic-music-from-captions-2412.16526"/></url>
<url><loc>https://scifaro.com/en/abs/improving-lip-synchrony-in-direct-audio-visual-speech-to-speech-translation-2412.16530</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-lip-synchrony-in-direct-audio-visual-speech-to-speech-translation-2412.16530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-lip-synchrony-in-direct-audio-visual-speech-to-speech-translation-2412.16530"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-seunet-mamba-unet-for-monaural-speech-enhancement-2412.16626</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-seunet-mamba-unet-for-monaural-speech-enhancement-2412.16626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-seunet-mamba-unet-for-monaural-speech-enhancement-2412.16626"/></url>
<url><loc>https://scifaro.com/en/abs/soundloc3d-invisible-3d-sound-source-localization-and-classification-using-a-multimodal-rgb-d-acoustic-camera-2412.16861</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundloc3d-invisible-3d-sound-source-localization-and-classification-using-a-multimodal-rgb-d-acoustic-camera-2412.16861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundloc3d-invisible-3d-sound-source-localization-and-classification-using-a-multimodal-rgb-d-acoustic-camera-2412.16861"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-frequency-state-space-duality-an-efficient-paradigm-for-speech-emotion-recognition-2412.16904</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-frequency-state-space-duality-an-efficient-paradigm-for-speech-emotion-recognition-2412.16904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-frequency-state-space-duality-an-efficient-paradigm-for-speech-emotion-recognition-2412.16904"/></url>
<url><loc>https://scifaro.com/en/abs/av-dtec-self-supervised-audio-visual-fusion-for-drone-trajectory-estimation-and-classification-2412.16928</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-dtec-self-supervised-audio-visual-fusion-for-drone-trajectory-estimation-and-classification-2412.16928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-dtec-self-supervised-audio-visual-fusion-for-drone-trajectory-estimation-and-classification-2412.16928"/></url>
<url><loc>https://scifaro.com/en/abs/trainingless-adaptation-of-pretrained-models-for-environmental-sound-classification-2412.17212</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trainingless-adaptation-of-pretrained-models-for-environmental-sound-classification-2412.17212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trainingless-adaptation-of-pretrained-models-for-environmental-sound-classification-2412.17212"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-consistency-guided-test-time-adaptation-for-contrastive-audio-language-models-with-unlabeled-audio-2412.17306</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-consistency-guided-test-time-adaptation-for-contrastive-audio-language-models-with-unlabeled-audio-2412.17306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-consistency-guided-test-time-adaptation-for-contrastive-audio-language-models-with-unlabeled-audio-2412.17306"/></url>
<url><loc>https://scifaro.com/en/abs/versa-a-versatile-evaluation-toolkit-for-speech-audio-and-music-2412.17667</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/versa-a-versatile-evaluation-toolkit-for-speech-audio-and-music-2412.17667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/versa-a-versatile-evaluation-toolkit-for-speech-audio-and-music-2412.17667"/></url>
<url><loc>https://scifaro.com/en/abs/are-audio-deepfake-detection-models-polyglots-2412.17924</loc><lastmod>2025-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-audio-deepfake-detection-models-polyglots-2412.17924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-audio-deepfake-detection-models-polyglots-2412.17924"/></url>
<url><loc>https://scifaro.com/en/abs/lla-vap-lstm-ensemble-of-llama-and-vap-for-turn-taking-prediction-2412.18061</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lla-vap-lstm-ensemble-of-llama-and-vap-for-turn-taking-prediction-2412.18061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lla-vap-lstm-ensemble-of-llama-and-vap-for-turn-taking-prediction-2412.18061"/></url>
<url><loc>https://scifaro.com/en/abs/smooth-foley-creating-continuous-sound-for-video-to-audio-generation-under-semantic-guidance-2412.18157</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smooth-foley-creating-continuous-sound-for-video-to-audio-generation-under-semantic-guidance-2412.18157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smooth-foley-creating-continuous-sound-for-video-to-audio-generation-under-semantic-guidance-2412.18157"/></url>
<url><loc>https://scifaro.com/en/abs/explaining-speaker-and-spoof-embeddings-via-probing-2412.18191</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explaining-speaker-and-spoof-embeddings-via-probing-2412.18191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explaining-speaker-and-spoof-embeddings-via-probing-2412.18191"/></url>
<url><loc>https://scifaro.com/en/abs/u-mamba-net-a-highly-efficient-mamba-based-u-net-style-network-for-noisy-and-reverberant-speech-separation-2412.18217</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-mamba-net-a-highly-efficient-mamba-based-u-net-style-network-for-noisy-and-reverberant-speech-separation-2412.18217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-mamba-net-a-highly-efficient-mamba-based-u-net-style-network-for-noisy-and-reverberant-speech-separation-2412.18217"/></url>
<url><loc>https://scifaro.com/en/abs/simi-sfx-a-similarity-based-conditioning-method-for-controllable-sound-effect-synthesis-2412.18710</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simi-sfx-a-similarity-based-conditioning-method-for-controllable-sound-effect-synthesis-2412.18710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simi-sfx-a-similarity-based-conditioning-method-for-controllable-sound-effect-synthesis-2412.18710"/></url>
<url><loc>https://scifaro.com/en/abs/mri2speech-speech-synthesis-from-articulatory-movements-recorded-by-real-time-mri-2412.18836</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mri2speech-speech-synthesis-from-articulatory-movements-recorded-by-real-time-mri-2412.18836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mri2speech-speech-synthesis-from-articulatory-movements-recorded-by-real-time-mri-2412.18836"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-nam-to-speech-conversion-with-novel-methods-and-the-multinam-dataset-2412.18839</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-nam-to-speech-conversion-with-novel-methods-and-the-multinam-dataset-2412.18839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-nam-to-speech-conversion-with-novel-methods-and-the-multinam-dataset-2412.18839"/></url>
<url><loc>https://scifaro.com/en/abs/attention-enhanced-short-time-wiener-solution-for-acoustic-echo-cancellation-2412.18851</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-enhanced-short-time-wiener-solution-for-acoustic-echo-cancellation-2412.18851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-enhanced-short-time-wiener-solution-for-acoustic-echo-cancellation-2412.18851"/></url>
<url><loc>https://scifaro.com/en/abs/robust-target-speaker-direction-of-arrival-estimation-2412.18913</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-target-speaker-direction-of-arrival-estimation-2412.18913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-target-speaker-direction-of-arrival-estimation-2412.18913"/></url>
<url><loc>https://scifaro.com/en/abs/leave-one-equivariant-alleviating-invariance-related-information-loss-in-contrastive-music-representations-2412.18955</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leave-one-equivariant-alleviating-invariance-related-information-loss-in-contrastive-music-representations-2412.18955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leave-one-equivariant-alleviating-invariance-related-information-loss-in-contrastive-music-representations-2412.18955"/></url>
<url><loc>https://scifaro.com/en/abs/bsdb-net-band-split-dual-branch-network-with-selective-state-spaces-mechanism-for-monaural-speech-enhancement-2412.19099</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bsdb-net-band-split-dual-branch-network-with-selective-state-spaces-mechanism-for-monaural-speech-enhancement-2412.19099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bsdb-net-band-split-dual-branch-network-with-selective-state-spaces-mechanism-for-monaural-speech-enhancement-2412.19099"/></url>
<url><loc>https://scifaro.com/en/abs/cohedancers-enhancing-interactive-group-dance-generation-through-music-driven-coherence-decomposition-2412.19123</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cohedancers-enhancing-interactive-group-dance-generation-through-music-driven-coherence-decomposition-2412.19123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cohedancers-enhancing-interactive-group-dance-generation-through-music-driven-coherence-decomposition-2412.19123"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-dynamic-music-emotion-recognition-with-dual-scale-attention-based-meta-learning-2412.19200</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-dynamic-music-emotion-recognition-with-dual-scale-attention-based-meta-learning-2412.19200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-dynamic-music-emotion-recognition-with-dual-scale-attention-based-meta-learning-2412.19200"/></url>
<url><loc>https://scifaro.com/en/abs/improving-generalization-for-ai-synthesized-voice-detection-2412.19279</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-generalization-for-ai-synthesized-voice-detection-2412.19279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-generalization-for-ai-synthesized-voice-detection-2412.19279"/></url>
<url><loc>https://scifaro.com/en/abs/etta-elucidating-the-design-space-of-text-to-audio-models-2412.19351</loc><lastmod>2025-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/etta-elucidating-the-design-space-of-text-to-audio-models-2412.19351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/etta-elucidating-the-design-space-of-text-to-audio-models-2412.19351"/></url>
<url><loc>https://scifaro.com/en/abs/mouth-articulation-based-anchoring-for-improved-cross-corpus-speech-emotion-recognition-2412.19909</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mouth-articulation-based-anchoring-for-improved-cross-corpus-speech-emotion-recognition-2412.19909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mouth-articulation-based-anchoring-for-improved-cross-corpus-speech-emotion-recognition-2412.19909"/></url>
<url><loc>https://scifaro.com/en/abs/stable-tts-stable-speaker-adaptive-text-to-speech-synthesis-via-prosody-prompting-2412.20155</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stable-tts-stable-speaker-adaptive-text-to-speech-synthesis-via-prosody-prompting-2412.20155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stable-tts-stable-speaker-adaptive-text-to-speech-synthesis-via-prosody-prompting-2412.20155"/></url>
<url><loc>https://scifaro.com/en/abs/language-based-audio-retrieval-with-co-attention-networks-2412.20914</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-based-audio-retrieval-with-co-attention-networks-2412.20914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-based-audio-retrieval-with-co-attention-networks-2412.20914"/></url>
<url><loc>https://scifaro.com/en/abs/tangoflux-super-fast-and-faithful-text-to-audio-generation-with-flow-matching-and-clap-ranked-preference-optimization-2412.21037</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tangoflux-super-fast-and-faithful-text-to-audio-generation-with-flow-matching-and-clap-ranked-preference-optimization-2412.21037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tangoflux-super-fast-and-faithful-text-to-audio-generation-with-flow-matching-and-clap-ranked-preference-optimization-2412.21037"/></url>
<url><loc>https://scifaro.com/en/abs/secodec-structural-entropy-based-compressive-speech-representation-codec-for-speech-language-models-2501.00018</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/secodec-structural-entropy-based-compressive-speech-representation-codec-for-speech-language-models-2501.00018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/secodec-structural-entropy-based-compressive-speech-representation-codec-for-speech-language-models-2501.00018"/></url>
<url><loc>https://scifaro.com/en/abs/lungmix-a-mixup-based-strategy-for-generalization-in-respiratory-sound-classification-2501.00064</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lungmix-a-mixup-based-strategy-for-generalization-in-respiratory-sound-classification-2501.00064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lungmix-a-mixup-based-strategy-for-generalization-in-respiratory-sound-classification-2501.00064"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-of-classifiers-for-speech-evaluation-2501.00067</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-of-classifiers-for-speech-evaluation-2501.00067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-of-classifiers-for-speech-evaluation-2501.00067"/></url>
<url><loc>https://scifaro.com/en/abs/voxvietnam-a-large-scale-multi-genre-dataset-for-vietnamese-speaker-recognition-2501.00328</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxvietnam-a-large-scale-multi-genre-dataset-for-vietnamese-speaker-recognition-2501.00328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxvietnam-a-large-scale-multi-genre-dataset-for-vietnamese-speaker-recognition-2501.00328"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-information-reconstruction-and-non-aligned-residual-in-spiking-neural-networks-for-speech-classification-2501.00348</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-information-reconstruction-and-non-aligned-residual-in-spiking-neural-networks-for-speech-classification-2501.00348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-information-reconstruction-and-non-aligned-residual-in-spiking-neural-networks-for-speech-classification-2501.00348"/></url>
<url><loc>https://scifaro.com/en/abs/tspe-task-specific-prompt-ensemble-for-improved-zero-shot-audio-classification-2501.00398</loc><lastmod>2025-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tspe-task-specific-prompt-ensemble-for-improved-zero-shot-audio-classification-2501.00398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tspe-task-specific-prompt-ensemble-for-improved-zero-shot-audio-classification-2501.00398"/></url>
<url><loc>https://scifaro.com/en/abs/unrolled-creative-adversarial-network-for-generating-novel-musical-pieces-2501.00452</loc><lastmod>2025-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unrolled-creative-adversarial-network-for-generating-novel-musical-pieces-2501.00452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unrolled-creative-adversarial-network-for-generating-novel-musical-pieces-2501.00452"/></url>
<url><loc>https://scifaro.com/en/abs/u-gift-uncertainty-guided-firewall-for-toxic-speech-in-few-shot-scenario-2501.00907</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-gift-uncertainty-guided-firewall-for-toxic-speech-in-few-shot-scenario-2501.00907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-gift-uncertainty-guided-firewall-for-toxic-speech-in-few-shot-scenario-2501.00907"/></url>
<url><loc>https://scifaro.com/en/abs/mmva-multimodal-matching-based-on-valence-and-arousal-across-images-music-and-musical-captions-2501.01094</loc><lastmod>2025-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmva-multimodal-matching-based-on-valence-and-arousal-across-images-music-and-musical-captions-2501.01094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmva-multimodal-matching-based-on-valence-and-arousal-across-images-music-and-musical-captions-2501.01094"/></url>
<url><loc>https://scifaro.com/en/abs/fast-fast-audio-spectrogram-transformer-2501.01104</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-fast-audio-spectrogram-transformer-2501.01104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-fast-audio-spectrogram-transformer-2501.01104"/></url>
<url><loc>https://scifaro.com/en/abs/muq-self-supervised-music-representation-learning-with-mel-residual-vector-quantization-2501.01108</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muq-self-supervised-music-representation-learning-with-mel-residual-vector-quantization-2501.01108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muq-self-supervised-music-representation-learning-with-mel-residual-vector-quantization-2501.01108"/></url>
<url><loc>https://scifaro.com/en/abs/robust-covid-19-detection-from-cough-sounds-using-deep-neural-decision-tree-and-forest-a-comprehensive-cross-datasets-evaluation-2501.01117</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-covid-19-detection-from-cough-sounds-using-deep-neural-decision-tree-and-forest-a-comprehensive-cross-datasets-evaluation-2501.01117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-covid-19-detection-from-cough-sounds-using-deep-neural-decision-tree-and-forest-a-comprehensive-cross-datasets-evaluation-2501.01117"/></url>
<url><loc>https://scifaro.com/en/abs/ringformer-a-neural-vocoder-with-ring-attention-and-convolution-augmented-transformer-2501.01182</loc><lastmod>2025-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ringformer-a-neural-vocoder-with-ring-attention-and-convolution-augmented-transformer-2501.01182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ringformer-a-neural-vocoder-with-ring-attention-and-convolution-augmented-transformer-2501.01182"/></url>
<url><loc>https://scifaro.com/en/abs/adaptvc-high-quality-voice-conversion-with-adaptive-learning-2501.01347</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptvc-high-quality-voice-conversion-with-adaptive-learning-2501.01347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptvc-high-quality-voice-conversion-with-adaptive-learning-2501.01347"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-hierarchical-features-for-anomalous-sound-detection-under-domain-shift-2501.01604</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-hierarchical-features-for-anomalous-sound-detection-under-domain-shift-2501.01604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-hierarchical-features-for-anomalous-sound-detection-under-domain-shift-2501.01604"/></url>
<url><loc>https://scifaro.com/en/abs/a-speech-enhancement-method-using-fast-fourier-transform-and-convolutional-autoencoder-2501.01650</loc><lastmod>2025-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speech-enhancement-method-using-fast-fourier-transform-and-convolutional-autoencoder-2501.01650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speech-enhancement-method-using-fast-fourier-transform-and-convolutional-autoencoder-2501.01650"/></url>
<url><loc>https://scifaro.com/en/abs/improved-feature-extraction-network-for-neuro-oriented-target-speaker-extraction-2501.01673</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-feature-extraction-network-for-neuro-oriented-target-speaker-extraction-2501.01673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-feature-extraction-network-for-neuro-oriented-target-speaker-extraction-2501.01673"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-your-attributes-in-voice-2501.01674</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-your-attributes-in-voice-2501.01674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-your-attributes-in-voice-2501.01674"/></url>
<url><loc>https://scifaro.com/en/abs/musicgen-stem-multi-stem-music-generation-and-edition-through-autoregressive-modeling-2501.01757</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicgen-stem-multi-stem-music-generation-and-edition-through-autoregressive-modeling-2501.01757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicgen-stem-multi-stem-music-generation-and-edition-through-autoregressive-modeling-2501.01757"/></url>
<url><loc>https://scifaro.com/en/abs/cycleflow-leveraging-cycle-consistency-in-flow-matching-for-speaker-style-adaptation-2501.01861</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycleflow-leveraging-cycle-consistency-in-flow-matching-for-speaker-style-adaptation-2501.01861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycleflow-leveraging-cycle-consistency-in-flow-matching-for-speaker-style-adaptation-2501.01861"/></url>
<url><loc>https://scifaro.com/en/abs/structural-and-statistical-audio-texture-knowledge-distillation-for-acoustic-classification-2501.01921</loc><lastmod>2026-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structural-and-statistical-audio-texture-knowledge-distillation-for-acoustic-classification-2501.01921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structural-and-statistical-audio-texture-knowledge-distillation-for-acoustic-classification-2501.01921"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-music-performance-errors-with-transformers-2501.02030</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-music-performance-errors-with-transformers-2501.02030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-music-performance-errors-with-transformers-2501.02030"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-the-gap-between-pretrained-speech-enhancement-and-recognition-models-using-a-real-speech-trained-bridging-module-2501.02452</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-the-gap-between-pretrained-speech-enhancement-and-recognition-models-using-a-real-speech-trained-bridging-module-2501.02452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-the-gap-between-pretrained-speech-enhancement-and-recognition-models-using-a-real-speech-trained-bridging-module-2501.02452"/></url>
<url><loc>https://scifaro.com/en/abs/a-system-for-melodic-harmonization-using-schoenberg-regions-giant-steps-and-church-modes-2501.02642</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-system-for-melodic-harmonization-using-schoenberg-regions-giant-steps-and-church-modes-2501.02642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-system-for-melodic-harmonization-using-schoenberg-regions-giant-steps-and-church-modes-2501.02642"/></url>
<url><loc>https://scifaro.com/en/abs/ccstereo-audio-visual-contextual-and-contrastive-learning-for-binaural-audio-generation-2501.02786</loc><lastmod>2025-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ccstereo-audio-visual-contextual-and-contrastive-learning-for-binaural-audio-generation-2501.02786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ccstereo-audio-visual-contextual-and-contrastive-learning-for-binaural-audio-generation-2501.02786"/></url>
<url><loc>https://scifaro.com/en/abs/towards-hrtf-personalization-using-denoising-diffusion-models-2501.02871</loc><lastmod>2025-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-hrtf-personalization-using-denoising-diffusion-models-2501.02871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-hrtf-personalization-using-denoising-diffusion-models-2501.02871"/></url>
<url><loc>https://scifaro.com/en/abs/syki-svc-advancing-singing-voice-conversion-with-post-processing-innovations-and-an-open-source-professional-testset-2501.02953</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syki-svc-advancing-singing-voice-conversion-with-post-processing-innovations-and-an-open-source-professional-testset-2501.02953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syki-svc-advancing-singing-voice-conversion-with-post-processing-innovations-and-an-open-source-professional-testset-2501.02953"/></url>
<url><loc>https://scifaro.com/en/abs/piano-transcription-by-hierarchical-language-modeling-with-pretrained-roll-based-encoders-2501.03038</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/piano-transcription-by-hierarchical-language-modeling-with-pretrained-roll-based-encoders-2501.03038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/piano-transcription-by-hierarchical-language-modeling-with-pretrained-roll-based-encoders-2501.03038"/></url>
<url><loc>https://scifaro.com/en/abs/facespeak-expressive-and-high-quality-speech-synthesis-from-human-portraits-of-different-styles-2501.03181</loc><lastmod>2025-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/facespeak-expressive-and-high-quality-speech-synthesis-from-human-portraits-of-different-styles-2501.03181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/facespeak-expressive-and-high-quality-speech-synthesis-from-human-portraits-of-different-styles-2501.03181"/></url>
<url><loc>https://scifaro.com/en/abs/lhgnn-local-higher-order-graph-neural-networks-for-audio-classification-and-tagging-2501.03464</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lhgnn-local-higher-order-graph-neural-networks-for-audio-classification-and-tagging-2501.03464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lhgnn-local-higher-order-graph-neural-networks-for-audio-classification-and-tagging-2501.03464"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-tract-length-warped-features-for-spoken-keyword-spotting-2501.03523</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-tract-length-warped-features-for-spoken-keyword-spotting-2501.03523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-tract-length-warped-features-for-spoken-keyword-spotting-2501.03523"/></url>
<url><loc>https://scifaro.com/en/abs/effective-and-efficient-mixed-precision-quantization-of-speech-foundation-models-2501.03643</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-and-efficient-mixed-precision-quantization-of-speech-foundation-models-2501.03643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-and-efficient-mixed-precision-quantization-of-speech-foundation-models-2501.03643"/></url>
<url><loc>https://scifaro.com/en/abs/majl-a-model-agnostic-joint-learning-framework-for-music-source-separation-and-pitch-estimation-2501.03689</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/majl-a-model-agnostic-joint-learning-framework-for-music-source-separation-and-pitch-estimation-2501.03689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/majl-a-model-agnostic-joint-learning-framework-for-music-source-separation-and-pitch-estimation-2501.03689"/></url>
<url><loc>https://scifaro.com/en/abs/guitar-techs-an-electric-guitar-dataset-covering-techniques-musical-excerpts-chords-and-scales-using-a-diverse-array-of-hardware-2501.03720</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guitar-techs-an-electric-guitar-dataset-covering-techniques-musical-excerpts-chords-and-scales-using-a-diverse-array-of-hardware-2501.03720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guitar-techs-an-electric-guitar-dataset-covering-techniques-musical-excerpts-chords-and-scales-using-a-diverse-array-of-hardware-2501.03720"/></url>
<url><loc>https://scifaro.com/en/abs/neuroincept-decoder-for-high-fidelity-speech-reconstruction-from-neural-activity-2501.03757</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuroincept-decoder-for-high-fidelity-speech-reconstruction-from-neural-activity-2501.03757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuroincept-decoder-for-high-fidelity-speech-reconstruction-from-neural-activity-2501.03757"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-the-undetectable-assessing-the-efficacy-of-current-spoof-detection-methods-against-seamless-speech-edits-2501.03805</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-the-undetectable-assessing-the-efficacy-of-current-spoof-detection-methods-against-seamless-speech-edits-2501.03805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-the-undetectable-assessing-the-efficacy-of-current-spoof-detection-methods-against-seamless-speech-edits-2501.03805"/></url>
<url><loc>https://scifaro.com/en/abs/drawspeech-expressive-speech-synthesis-using-prosodic-sketches-as-control-conditions-2501.04256</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drawspeech-expressive-speech-synthesis-using-prosodic-sketches-as-control-conditions-2501.04256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drawspeech-expressive-speech-synthesis-using-prosodic-sketches-as-control-conditions-2501.04256"/></url>
<url><loc>https://scifaro.com/en/abs/maduv-the-1st-interspeech-mice-autism-detection-via-ultrasound-vocalization-challenge-2501.04292</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maduv-the-1st-interspeech-mice-autism-detection-via-ultrasound-vocalization-challenge-2501.04292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maduv-the-1st-interspeech-mice-autism-detection-via-ultrasound-vocalization-challenge-2501.04292"/></url>
<url><loc>https://scifaro.com/en/abs/phone-purity-guided-discrete-tokens-for-dysarthric-speech-recognition-2501.04379</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phone-purity-guided-discrete-tokens-for-dysarthric-speech-recognition-2501.04379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phone-purity-guided-discrete-tokens-for-dysarthric-speech-recognition-2501.04379"/></url>
<url><loc>https://scifaro.com/en/abs/right-label-context-in-end-to-end-training-of-time-synchronous-asr-models-2501.04521</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/right-label-context-in-end-to-end-training-of-time-synchronous-asr-models-2501.04521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/right-label-context-in-end-to-end-training-of-time-synchronous-asr-models-2501.04521"/></url>
<url><loc>https://scifaro.com/en/abs/planing-it-by-ear-convolutional-neural-networks-for-acoustic-anomaly-detection-in-industrial-wood-planers-2501.04819</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/planing-it-by-ear-convolutional-neural-networks-for-acoustic-anomaly-detection-in-industrial-wood-planers-2501.04819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/planing-it-by-ear-convolutional-neural-networks-for-acoustic-anomaly-detection-in-industrial-wood-planers-2501.04819"/></url>
<url><loc>https://scifaro.com/en/abs/signl-a-label-efficient-audio-deepfake-detection-system-via-spectral-temporal-graph-non-contrastive-learning-2501.04942</loc><lastmod>2026-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signl-a-label-efficient-audio-deepfake-detection-system-via-spectral-temporal-graph-non-contrastive-learning-2501.04942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signl-a-label-efficient-audio-deepfake-detection-system-via-spectral-temporal-graph-non-contrastive-learning-2501.04942"/></url>
<url><loc>https://scifaro.com/en/abs/music-tagging-with-classifier-group-chains-2501.05050</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-tagging-with-classifier-group-chains-2501.05050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-tagging-with-classifier-group-chains-2501.05050"/></url>
<url><loc>https://scifaro.com/en/abs/d3rm-a-discrete-denoising-diffusion-refinement-model-for-piano-transcription-2501.05068</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/d3rm-a-discrete-denoising-diffusion-refinement-model-for-piano-transcription-2501.05068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/d3rm-a-discrete-denoising-diffusion-refinement-model-for-piano-transcription-2501.05068"/></url>
<url><loc>https://scifaro.com/en/abs/diffattack-diffusion-based-timbre-reserved-adversarial-attack-in-speaker-identification-2501.05127</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffattack-diffusion-based-timbre-reserved-adversarial-attack-in-speaker-identification-2501.05127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffattack-diffusion-based-timbre-reserved-adversarial-attack-in-speaker-identification-2501.05127"/></url>
<url><loc>https://scifaro.com/en/abs/zipenhancer-dual-path-down-up-sampling-based-zipformer-for-monaural-speech-enhancement-2501.05183</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zipenhancer-dual-path-down-up-sampling-based-zipformer-for-monaural-speech-enhancement-2501.05183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zipenhancer-dual-path-down-up-sampling-based-zipformer-for-monaural-speech-enhancement-2501.05183"/></url>
<url><loc>https://scifaro.com/en/abs/ancogen-analysis-control-and-generation-of-speech-with-a-masked-autoencoder-2501.05332</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ancogen-analysis-control-and-generation-of-speech-with-a-masked-autoencoder-2501.05332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ancogen-analysis-control-and-generation-of-speech-with-a-masked-autoencoder-2501.05332"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-sound-assembling-sounds-from-visuals-for-audio-to-image-generation-2501.05413</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-sound-assembling-sounds-from-visuals-for-audio-to-image-generation-2501.05413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-sound-assembling-sounds-from-visuals-for-audio-to-image-generation-2501.05413"/></url>
<url><loc>https://scifaro.com/en/abs/unmasking-deepfakes-leveraging-augmentations-and-features-variability-for-deepfake-speech-detection-2501.05545</loc><lastmod>2025-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unmasking-deepfakes-leveraging-augmentations-and-features-variability-for-deepfake-speech-detection-2501.05545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unmasking-deepfakes-leveraging-augmentations-and-features-variability-for-deepfake-speech-detection-2501.05545"/></url>
<url><loc>https://scifaro.com/en/abs/freesvc-towards-zero-shot-multilingual-singing-voice-conversion-2501.05586</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/freesvc-towards-zero-shot-multilingual-singing-voice-conversion-2501.05586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/freesvc-towards-zero-shot-multilingual-singing-voice-conversion-2501.05586"/></url>
<url><loc>https://scifaro.com/en/abs/expo-explainable-phonetic-trait-oriented-network-for-speaker-verification-2501.05729</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expo-explainable-phonetic-trait-oriented-network-for-speaker-verification-2501.05729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expo-explainable-phonetic-trait-oriented-network-for-speaker-verification-2501.05729"/></url>
<url><loc>https://scifaro.com/en/abs/cognospeak-an-automatic-remote-assessment-of-early-cognitive-decline-in-real-world-conversational-speech-2501.05755</loc><lastmod>2025-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cognospeak-an-automatic-remote-assessment-of-early-cognitive-decline-in-real-world-conversational-speech-2501.05755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cognospeak-an-automatic-remote-assessment-of-early-cognitive-decline-in-real-world-conversational-speech-2501.05755"/></url>
<url><loc>https://scifaro.com/en/abs/towards-early-prediction-of-self-supervised-speech-model-performance-2501.05966</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-early-prediction-of-self-supervised-speech-model-performance-2501.05966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-early-prediction-of-self-supervised-speech-model-performance-2501.05966"/></url>
<url><loc>https://scifaro.com/en/abs/xlstm-senet-xlstm-for-single-channel-speech-enhancement-2501.06146</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xlstm-senet-xlstm-for-single-channel-speech-enhancement-2501.06146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xlstm-senet-xlstm-for-single-channel-speech-enhancement-2501.06146"/></url>
<url><loc>https://scifaro.com/en/abs/proemo-prompt-driven-text-to-speech-synthesis-based-on-emotion-and-intensity-control-2501.06276</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proemo-prompt-driven-text-to-speech-synthesis-based-on-emotion-and-intensity-control-2501.06276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proemo-prompt-driven-text-to-speech-synthesis-based-on-emotion-and-intensity-control-2501.06276"/></url>
<url><loc>https://scifaro.com/en/abs/unispeaker-a-unified-approach-for-multimodality-driven-speaker-generation-2501.06394</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unispeaker-a-unified-approach-for-multimodality-driven-speaker-generation-2501.06394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unispeaker-a-unified-approach-for-multimodality-driven-speaker-generation-2501.06394"/></url>
<url><loc>https://scifaro.com/en/abs/neural-codec-source-tracing-toward-comprehensive-attribution-in-open-set-condition-2501.06514</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-codec-source-tracing-toward-comprehensive-attribution-in-open-set-condition-2501.06514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-codec-source-tracing-toward-comprehensive-attribution-in-open-set-condition-2501.06514"/></url>
<url><loc>https://scifaro.com/en/abs/sanidha-a-studio-quality-multi-modal-dataset-for-carnatic-music-2501.06959</loc><lastmod>2025-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sanidha-a-studio-quality-multi-modal-dataset-for-carnatic-music-2501.06959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sanidha-a-studio-quality-multi-modal-dataset-for-carnatic-music-2501.06959"/></url>
<url><loc>https://scifaro.com/en/abs/audio-cot-exploring-chain-of-thought-reasoning-in-large-audio-language-model-2501.07246</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-cot-exploring-chain-of-thought-reasoning-in-large-audio-language-model-2501.07246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-cot-exploring-chain-of-thought-reasoning-in-large-audio-language-model-2501.07246"/></url>
<url><loc>https://scifaro.com/en/abs/joint-automatic-speech-recognition-and-structure-learning-for-better-speech-understanding-2501.07329</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-automatic-speech-recognition-and-structure-learning-for-better-speech-understanding-2501.07329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-automatic-speech-recognition-and-structure-learning-for-better-speech-understanding-2501.07329"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-musical-surprisal-in-audio-2501.07474</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-musical-surprisal-in-audio-2501.07474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-musical-surprisal-in-audio-2501.07474"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-musical-evolution-through-network-science-2501.07557</loc><lastmod>2026-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-musical-evolution-through-network-science-2501.07557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-musical-evolution-through-network-science-2501.07557"/></url>
<url><loc>https://scifaro.com/en/abs/bridge-sr-schr-odinger-bridge-for-efficient-sr-2501.07897</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridge-sr-schr-odinger-bridge-for-efficient-sr-2501.07897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridge-sr-schr-odinger-bridge-for-efficient-sr-2501.07897"/></url>
<url><loc>https://scifaro.com/en/abs/codecfake-a-large-scale-neural-audio-codec-based-deepfake-speech-dataset-2501.08238</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codecfake-a-large-scale-neural-audio-codec-based-deepfake-speech-dataset-2501.08238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codecfake-a-large-scale-neural-audio-codec-based-deepfake-speech-dataset-2501.08238"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lightweight-and-stable-zero-shot-tts-with-self-distilled-representation-disentanglement-2501.08566</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lightweight-and-stable-zero-shot-tts-with-self-distilled-representation-disentanglement-2501.08566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lightweight-and-stable-zero-shot-tts-with-self-distilled-representation-disentanglement-2501.08566"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-data-augmentation-with-naturalspeech3-for-far-field-speaker-verification-2501.08691</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-data-augmentation-with-naturalspeech3-for-far-field-speaker-verification-2501.08691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-data-augmentation-with-naturalspeech3-for-far-field-speaker-verification-2501.08691"/></url>
<url><loc>https://scifaro.com/en/abs/xmusic-towards-a-generalized-and-controllable-symbolic-music-generation-framework-2501.08809</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xmusic-towards-a-generalized-and-controllable-symbolic-music-generation-framework-2501.08809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xmusic-towards-a-generalized-and-controllable-symbolic-music-generation-framework-2501.08809"/></url>
<url><loc>https://scifaro.com/en/abs/discrimination-loss-vs-srt-a-model-based-approach-towards-harmonizing-speech-test-interpretations-2501.08921</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrimination-loss-vs-srt-a-model-based-approach-towards-harmonizing-speech-test-interpretations-2501.08921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrimination-loss-vs-srt-a-model-based-approach-towards-harmonizing-speech-test-interpretations-2501.08921"/></url>
<url><loc>https://scifaro.com/en/abs/a-non-autoregressive-model-for-joint-stt-and-tts-2501.09104</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-non-autoregressive-model-for-joint-stt-and-tts-2501.09104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-non-autoregressive-model-for-joint-stt-and-tts-2501.09104"/></url>
<url><loc>https://scifaro.com/en/abs/metric-learning-with-progressive-self-distillation-for-audio-visual-embedding-learning-2501.09608</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-learning-with-progressive-self-distillation-for-audio-visual-embedding-learning-2501.09608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-learning-with-progressive-self-distillation-for-audio-visual-embedding-learning-2501.09608"/></url>
<url><loc>https://scifaro.com/en/abs/gvmgen-a-general-video-to-music-generation-model-with-hierarchical-attentions-2501.09972</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gvmgen-a-general-video-to-music-generation-model-with-hierarchical-attentions-2501.09972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gvmgen-a-general-video-to-music-generation-model-with-hierarchical-attentions-2501.09972"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-sr-a-unified-generative-transformer-convolutional-adversarial-network-for-high-fidelity-speech-super-resolution-2501.10045</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-sr-a-unified-generative-transformer-convolutional-adversarial-network-for-high-fidelity-speech-super-resolution-2501.10045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-sr-a-unified-generative-transformer-convolutional-adversarial-network-for-high-fidelity-speech-super-resolution-2501.10045"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-latent-diffusion-based-speech-enhancement-via-dual-context-learning-2501.10052</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-latent-diffusion-based-speech-enhancement-via-dual-context-learning-2501.10052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-latent-diffusion-based-speech-enhancement-via-dual-context-learning-2501.10052"/></url>
<url><loc>https://scifaro.com/en/abs/ai-generated-music-detection-and-its-challenges-2501.10111</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-generated-music-detection-and-its-challenges-2501.10111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-generated-music-detection-and-its-challenges-2501.10111"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-integrated-approach-for-expressive-piano-performance-synthesis-from-music-scores-2501.10222</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-integrated-approach-for-expressive-piano-performance-synthesis-from-music-scores-2501.10222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-integrated-approach-for-expressive-piano-performance-synthesis-from-music-scores-2501.10222"/></url>
<url><loc>https://scifaro.com/en/abs/dfingernet-noise-adaptive-speech-enhancement-for-hearing-aids-2501.10525</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dfingernet-noise-adaptive-speech-enhancement-for-hearing-aids-2501.10525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dfingernet-noise-adaptive-speech-enhancement-for-hearing-aids-2501.10525"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-detection-based-on-mfcc-and-cnn-lstm-architecture-2501.10666</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-detection-based-on-mfcc-and-cnn-lstm-architecture-2501.10666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-detection-based-on-mfcc-and-cnn-lstm-architecture-2501.10666"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-study-on-joint-modeling-for-sound-event-localization-and-detection-with-source-distance-estimation-2501.10755</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-study-on-joint-modeling-for-sound-event-localization-and-detection-with-source-distance-estimation-2501.10755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-study-on-joint-modeling-for-sound-event-localization-and-detection-with-source-distance-estimation-2501.10755"/></url>
<url><loc>https://scifaro.com/en/abs/musiceval-a-generative-music-dataset-with-expert-ratings-for-automatic-text-to-music-evaluation-2501.10811</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musiceval-a-generative-music-dataset-with-expert-ratings-for-automatic-text-to-music-evaluation-2501.10811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musiceval-a-generative-music-dataset-with-expert-ratings-for-automatic-text-to-music-evaluation-2501.10811"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-neural-spoken-language-recognition-an-exploration-with-multilingual-datasets-2501.11065</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-neural-spoken-language-recognition-an-exploration-with-multilingual-datasets-2501.11065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-neural-spoken-language-recognition-an-exploration-with-multilingual-datasets-2501.11065"/></url>
<url><loc>https://scifaro.com/en/abs/water-flow-detection-device-based-on-sound-data-analysis-and-machine-learning-to-detect-water-leakage-2501.11151</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/water-flow-detection-device-based-on-sound-data-analysis-and-machine-learning-to-detect-water-leakage-2501.11151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/water-flow-detection-device-based-on-sound-data-analysis-and-machine-learning-to-detect-water-leakage-2501.11151"/></url>
<url><loc>https://scifaro.com/en/abs/a2sb-audio-to-audio-schrodinger-bridges-2501.11311</loc><lastmod>2025-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a2sb-audio-to-audio-schrodinger-bridges-2501.11311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a2sb-audio-to-audio-schrodinger-bridges-2501.11311"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-whisper-asr-hallucinations-induced-by-non-speech-audio-2501.11378</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-whisper-asr-hallucinations-induced-by-non-speech-audio-2501.11378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-whisper-asr-hallucinations-induced-by-non-speech-audio-2501.11378"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-estimation-in-the-real-world-a-study-on-music-emotion-recognition-2501.11570</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-estimation-in-the-real-world-a-study-on-music-emotion-recognition-2501.11570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-estimation-in-the-real-world-a-study-on-music-emotion-recognition-2501.11570"/></url>
<url><loc>https://scifaro.com/en/abs/noise-agnostic-multitask-whisper-training-for-reducing-false-alarm-errors-in-call-for-help-detection-2501.11631</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-agnostic-multitask-whisper-training-for-reducing-false-alarm-errors-in-call-for-help-detection-2501.11631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-agnostic-multitask-whisper-training-for-reducing-false-alarm-errors-in-call-for-help-detection-2501.11631"/></url>
<url><loc>https://scifaro.com/en/abs/transferable-adversarial-attacks-on-audio-deepfake-detection-2501.11902</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferable-adversarial-attacks-on-audio-deepfake-detection-2501.11902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferable-adversarial-attacks-on-audio-deepfake-detection-2501.11902"/></url>
<url><loc>https://scifaro.com/en/abs/dota-me-cs-daily-oriented-text-audio-mandarin-english-code-switching-dataset-2501.12122</loc><lastmod>2025-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dota-me-cs-daily-oriented-text-audio-mandarin-english-code-switching-dataset-2501.12122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dota-me-cs-daily-oriented-text-audio-mandarin-english-code-switching-dataset-2501.12122"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-approach-for-korean-wakeword-systems-with-speaker-authentication-2501.12194</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-korean-wakeword-systems-with-speaker-authentication-2501.12194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-korean-wakeword-systems-with-speaker-authentication-2501.12194"/></url>
<url><loc>https://scifaro.com/en/abs/audio-texture-manipulation-by-exemplar-based-analogy-2501.12385</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-texture-manipulation-by-exemplar-based-analogy-2501.12385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-texture-manipulation-by-exemplar-based-analogy-2501.12385"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-losses-for-hierarchical-embedding-learning-2501.12796</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-losses-for-hierarchical-embedding-learning-2501.12796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-losses-for-hierarchical-embedding-learning-2501.12796"/></url>
<url><loc>https://scifaro.com/en/abs/s-key-self-supervised-learning-of-major-and-minor-keys-from-audio-2501.12907</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s-key-self-supervised-learning-of-major-and-minor-keys-from-audio-2501.12907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s-key-self-supervised-learning-of-major-and-minor-keys-from-audio-2501.12907"/></url>
<url><loc>https://scifaro.com/en/abs/osum-advancing-open-speech-understanding-models-with-limited-resources-in-academia-2501.13306</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/osum-advancing-open-speech-understanding-models-with-limited-resources-in-academia-2501.13306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/osum-advancing-open-speech-understanding-models-with-limited-resources-in-academia-2501.13306"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-multi-modality-gaps-of-audio-visual-and-linguistic-for-speech-enhancement-2501.13375</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-multi-modality-gaps-of-audio-visual-and-linguistic-for-speech-enhancement-2501.13375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-multi-modality-gaps-of-audio-visual-and-linguistic-for-speech-enhancement-2501.13375"/></url>
<url><loc>https://scifaro.com/en/abs/neural-vocoders-as-speech-enhancers-2501.13465</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-vocoders-as-speech-enhancers-2501.13465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-vocoders-as-speech-enhancers-2501.13465"/></url>
<url><loc>https://scifaro.com/en/abs/dq-data2vec-decoupling-quantization-for-multilingual-speech-recognition-2501.13497</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dq-data2vec-decoupling-quantization-for-multilingual-speech-recognition-2501.13497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dq-data2vec-decoupling-quantization-for-multilingual-speech-recognition-2501.13497"/></url>
<url><loc>https://scifaro.com/en/abs/jailbreak-audiobench-in-depth-evaluation-and-analysis-of-jailbreak-threats-for-large-audio-language-models-2501.13772</loc><lastmod>2026-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jailbreak-audiobench-in-depth-evaluation-and-analysis-of-jailbreak-threats-for-large-audio-language-models-2501.13772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jailbreak-audiobench-in-depth-evaluation-and-analysis-of-jailbreak-threats-for-large-audio-language-models-2501.13772"/></url>
<url><loc>https://scifaro.com/en/abs/everyone-can-sing-zero-shot-singing-voice-synthesis-and-conversion-with-speech-reference-2501.13870</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/everyone-can-sing-zero-shot-singing-voice-synthesis-and-conversion-with-speech-reference-2501.13870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/everyone-can-sing-zero-shot-singing-voice-synthesis-and-conversion-with-speech-reference-2501.13870"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-spatial-cues-from-cochlear-implant-microphones-to-efficiently-enhance-speech-separation-in-real-world-listening-scenes-2501.14610</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-spatial-cues-from-cochlear-implant-microphones-to-efficiently-enhance-speech-separation-in-real-world-listening-scenes-2501.14610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-spatial-cues-from-cochlear-implant-microphones-to-efficiently-enhance-speech-separation-in-real-world-listening-scenes-2501.14610"/></url>
<url><loc>https://scifaro.com/en/abs/methods-to-increase-the-amount-of-data-for-speech-recognition-for-low-resource-languages-2501.14788</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/methods-to-increase-the-amount-of-data-for-speech-recognition-for-low-resource-languages-2501.14788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/methods-to-increase-the-amount-of-data-for-speech-recognition-for-low-resource-languages-2501.14788"/></url>
<url><loc>https://scifaro.com/en/abs/robust-cross-etiology-and-speaker-independent-dysarthric-speech-recognition-2501.14994</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-cross-etiology-and-speaker-independent-dysarthric-speech-recognition-2501.14994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-cross-etiology-and-speaker-independent-dysarthric-speech-recognition-2501.14994"/></url>
<url><loc>https://scifaro.com/en/abs/superear-eavesdropping-on-mobile-voice-calls-via-stealthy-acoustic-metamaterials-2501.15032</loc><lastmod>2026-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/superear-eavesdropping-on-mobile-voice-calls-via-stealthy-acoustic-metamaterials-2501.15032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/superear-eavesdropping-on-mobile-voice-calls-via-stealthy-acoustic-metamaterials-2501.15032"/></url>
<url><loc>https://scifaro.com/en/abs/audio-language-models-for-audio-centric-tasks-a-systematic-survey-2501.15177</loc><lastmod>2026-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-language-models-for-audio-centric-tasks-a-systematic-survey-2501.15177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-language-models-for-audio-centric-tasks-a-systematic-survey-2501.15177"/></url>
<url><loc>https://scifaro.com/en/abs/the-icme-2025-audio-encoder-capability-challenge-2501.15302</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-icme-2025-audio-encoder-capability-challenge-2501.15302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-icme-2025-audio-encoder-capability-challenge-2501.15302"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-using-human-in-the-loop-reinforcement-learning-2501.15304</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-using-human-in-the-loop-reinforcement-learning-2501.15304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-using-human-in-the-loop-reinforcement-learning-2501.15304"/></url>
<url><loc>https://scifaro.com/en/abs/anyenhance-a-unified-generative-model-with-prompt-guidance-and-self-critic-for-voice-enhancement-2501.15417</loc><lastmod>2025-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anyenhance-a-unified-generative-model-with-prompt-guidance-and-self-critic-for-voice-enhancement-2501.15417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anyenhance-a-unified-generative-model-with-prompt-guidance-and-self-critic-for-voice-enhancement-2501.15417"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-the-amphion-toolkit-v0-2-2501.15442</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-the-amphion-toolkit-v0-2-2501.15442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-the-amphion-toolkit-v0-2-2501.15442"/></url>
<url><loc>https://scifaro.com/en/abs/stepback-enhanced-disentanglement-for-voice-conversion-via-multi-task-learning-2501.15613</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stepback-enhanced-disentanglement-for-voice-conversion-via-multi-task-learning-2501.15613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stepback-enhanced-disentanglement-for-voice-conversion-via-multi-task-learning-2501.15613"/></url>
<url><loc>https://scifaro.com/en/abs/emilia-a-large-scale-extensive-multilingual-and-diverse-dataset-for-speech-generation-2501.15907</loc><lastmod>2025-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emilia-a-large-scale-extensive-multilingual-and-diverse-dataset-for-speech-generation-2501.15907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emilia-a-large-scale-extensive-multilingual-and-diverse-dataset-for-speech-generation-2501.15907"/></url>
<url><loc>https://scifaro.com/en/abs/optimized-self-supervised-training-with-best-rq-for-speech-recognition-2501.16131</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimized-self-supervised-training-with-best-rq-for-speech-recognition-2501.16131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimized-self-supervised-training-with-best-rq-for-speech-recognition-2501.16131"/></url>
<url><loc>https://scifaro.com/en/abs/ave-speech-a-comprehensive-multi-modal-dataset-for-speech-recognition-integrating-audio-visual-and-electromyographic-signals-2501.16780</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ave-speech-a-comprehensive-multi-modal-dataset-for-speech-recognition-integrating-audio-visual-and-electromyographic-signals-2501.16780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ave-speech-a-comprehensive-multi-modal-dataset-for-speech-recognition-integrating-audio-visual-and-electromyographic-signals-2501.16780"/></url>
<url><loc>https://scifaro.com/en/abs/midi-gpt-a-controllable-generative-model-for-computer-assisted-multitrack-music-composition-2501.17011</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-gpt-a-controllable-generative-model-for-computer-assisted-multitrack-music-composition-2501.17011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-gpt-a-controllable-generative-model-for-computer-assisted-multitrack-music-composition-2501.17011"/></url>
<url><loc>https://scifaro.com/en/abs/6ksfx-synth-dataset-2501.17198</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/6ksfx-synth-dataset-2501.17198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/6ksfx-synth-dataset-2501.17198"/></url>
<url><loc>https://scifaro.com/en/abs/audio-large-language-models-can-be-descriptive-speech-quality-evaluators-2501.17202</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-large-language-models-can-be-descriptive-speech-quality-evaluators-2501.17202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-large-language-models-can-be-descriptive-speech-quality-evaluators-2501.17202"/></url>
<url><loc>https://scifaro.com/en/abs/summary-of-the-notsofar-1-challenge-highlights-and-learnings-2501.17304</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/summary-of-the-notsofar-1-challenge-highlights-and-learnings-2501.17304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/summary-of-the-notsofar-1-challenge-highlights-and-learnings-2501.17304"/></url>
<url><loc>https://scifaro.com/en/abs/compact-neural-tts-voices-for-accessibility-2501.17332</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compact-neural-tts-voices-for-accessibility-2501.17332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compact-neural-tts-voices-for-accessibility-2501.17332"/></url>
<url><loc>https://scifaro.com/en/abs/music2latent2-audio-compression-with-summary-embeddings-and-autoregressive-decoding-2501.17578</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music2latent2-audio-compression-with-summary-embeddings-and-autoregressive-decoding-2501.17578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music2latent2-audio-compression-with-summary-embeddings-and-autoregressive-decoding-2501.17578"/></url>
<url><loc>https://scifaro.com/en/abs/voiceprompter-robust-zero-shot-voice-conversion-with-voice-prompt-and-conditional-flow-matching-2501.17612</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceprompter-robust-zero-shot-voice-conversion-with-voice-prompt-and-conditional-flow-matching-2501.17612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceprompter-robust-zero-shot-voice-conversion-with-voice-prompt-and-conditional-flow-matching-2501.17612"/></url>
<url><loc>https://scifaro.com/en/abs/yin-yang-developing-motifs-with-long-term-structure-and-controllability-2501.17759</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yin-yang-developing-motifs-with-long-term-structure-and-controllability-2501.17759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yin-yang-developing-motifs-with-long-term-structure-and-controllability-2501.17759"/></url>
<url><loc>https://scifaro.com/en/abs/acoupi-an-open-source-python-framework-for-deploying-bioacoustic-ai-models-on-edge-devices-2501.17841</loc><lastmod>2026-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoupi-an-open-source-python-framework-for-deploying-bioacoustic-ai-models-on-edge-devices-2501.17841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoupi-an-open-source-python-framework-for-deploying-bioacoustic-ai-models-on-edge-devices-2501.17841"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-audiovisual-speech-processing-via-mutud-multimodal-training-and-unimodal-deployment-2501.18157</loc><lastmod>2025-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-audiovisual-speech-processing-via-mutud-multimodal-training-and-unimodal-deployment-2501.18157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-audiovisual-speech-processing-via-mutud-multimodal-training-and-unimodal-deployment-2501.18157"/></url>
<url><loc>https://scifaro.com/en/abs/deepfake-detection-of-singing-voices-with-whisper-encodings-2501.18919</loc><lastmod>2025-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfake-detection-of-singing-voices-with-whisper-encodings-2501.18919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfake-detection-of-singing-voices-with-whisper-encodings-2501.18919"/></url>
<url><loc>https://scifaro.com/en/abs/selma-a-speech-enabled-language-model-for-virtual-assistant-interactions-2501.19377</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selma-a-speech-enabled-language-model-for-virtual-assistant-interactions-2501.19377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selma-a-speech-enabled-language-model-for-virtual-assistant-interactions-2501.19377"/></url>
<url><loc>https://scifaro.com/en/abs/evolving-performance-practices-in-beethoven-s-cello-sonatas-tempo-portamento-and-historical-interpretation-of-the-first-movements-2502.00030</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolving-performance-practices-in-beethoven-s-cello-sonatas-tempo-portamento-and-historical-interpretation-of-the-first-movements-2502.00030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolving-performance-practices-in-beethoven-s-cello-sonatas-tempo-portamento-and-historical-interpretation-of-the-first-movements-2502.00030"/></url>
<url><loc>https://scifaro.com/en/abs/sigwavnet-learning-multiresolution-signal-wavelet-network-for-speech-emotion-recognition-2502.00310</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sigwavnet-learning-multiresolution-signal-wavelet-network-for-speech-emotion-recognition-2502.00310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sigwavnet-learning-multiresolution-signal-wavelet-network-for-speech-emotion-recognition-2502.00310"/></url>
<url><loc>https://scifaro.com/en/abs/do-audio-visual-segmentation-models-truly-segment-sounding-objects-2502.00358</loc><lastmod>2025-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-audio-visual-segmentation-models-truly-segment-sounding-objects-2502.00358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-audio-visual-segmentation-models-truly-segment-sounding-objects-2502.00358"/></url>
<url><loc>https://scifaro.com/en/abs/audiogenx-explainability-on-text-to-audio-generative-models-2502.00459</loc><lastmod>2025-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiogenx-explainability-on-text-to-audio-generative-models-2502.00459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiogenx-explainability-on-text-to-audio-generative-models-2502.00459"/></url>
<url><loc>https://scifaro.com/en/abs/cycleguardian-a-framework-for-automatic-respiratorysound-classification-based-on-improved-deep-clustering-and-contrastive-learning-2502.00734</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycleguardian-a-framework-for-automatic-respiratorysound-classification-based-on-improved-deep-clustering-and-contrastive-learning-2502.00734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycleguardian-a-framework-for-automatic-respiratorysound-classification-based-on-improved-deep-clustering-and-contrastive-learning-2502.00734"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-face-to-speech-2502.01046</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-face-to-speech-2502.01046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-face-to-speech-2502.01046"/></url>
<url><loc>https://scifaro.com/en/abs/gradient-norm-based-fine-tuning-for-backdoor-defense-in-automatic-speech-recognition-2502.01152</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gradient-norm-based-fine-tuning-for-backdoor-defense-in-automatic-speech-recognition-2502.01152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gradient-norm-based-fine-tuning-for-backdoor-defense-in-automatic-speech-recognition-2502.01152"/></url>
<url><loc>https://scifaro.com/en/abs/deep-active-speech-cancellation-with-mamba-masking-network-2502.01185</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-active-speech-cancellation-with-mamba-masking-network-2502.01185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-active-speech-cancellation-with-mamba-masking-network-2502.01185"/></url>
<url><loc>https://scifaro.com/en/abs/adapter-based-multi-agent-avsr-extension-for-pre-trained-asr-models-2502.01709</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapter-based-multi-agent-avsr-extension-for-pre-trained-asr-models-2502.01709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapter-based-multi-agent-avsr-extension-for-pre-trained-asr-models-2502.01709"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-perceptual-music-similarity-focusing-on-each-instrumental-part-2502.02138</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-perceptual-music-similarity-focusing-on-each-instrumental-part-2502.02138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-perceptual-music-similarity-focusing-on-each-instrumental-part-2502.02138"/></url>
<url><loc>https://scifaro.com/en/abs/pruning-aware-loss-functions-for-stoi-optimized-pruned-recurrent-autoencoders-for-the-compression-of-the-stimulation-patterns-of-cochlear-implants-at-zero-delay-2502.02424</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pruning-aware-loss-functions-for-stoi-optimized-pruned-recurrent-autoencoders-for-the-compression-of-the-stimulation-patterns-of-cochlear-implants-at-zero-delay-2502.02424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pruning-aware-loss-functions-for-stoi-optimized-pruned-recurrent-autoencoders-for-the-compression-of-the-stimulation-patterns-of-cochlear-implants-at-zero-delay-2502.02424"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-speaker-change-detection-and-gender-classification-for-transducer-based-multi-talker-speech-translation-2502.02683</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-speaker-change-detection-and-gender-classification-for-transducer-based-multi-talker-speech-translation-2502.02683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-speaker-change-detection-and-gender-classification-for-transducer-based-multi-talker-speech-translation-2502.02683"/></url>
<url><loc>https://scifaro.com/en/abs/metis-a-foundation-speech-generation-model-with-masked-generative-pre-training-2502.03128</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metis-a-foundation-speech-generation-model-with-masked-generative-pre-training-2502.03128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metis-a-foundation-speech-generation-model-with-masked-generative-pre-training-2502.03128"/></url>
<url><loc>https://scifaro.com/en/abs/towards-unified-music-emotion-recognition-across-dimensional-and-categorical-models-2502.03979</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-unified-music-emotion-recognition-across-dimensional-and-categorical-models-2502.03979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-unified-music-emotion-recognition-across-dimensional-and-categorical-models-2502.03979"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-two-microphone-method-for-in-situ-sound-absorption-measurements-2502.04143</loc><lastmod>2025-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-two-microphone-method-for-in-situ-sound-absorption-measurements-2502.04143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-two-microphone-method-for-in-situ-sound-absorption-measurements-2502.04143"/></url>
<url><loc>https://scifaro.com/en/abs/xattnmark-learning-robust-audio-watermarking-with-cross-attention-2502.04230</loc><lastmod>2026-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xattnmark-learning-robust-audio-watermarking-with-cross-attention-2502.04230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xattnmark-learning-robust-audio-watermarking-with-cross-attention-2502.04230"/></url>
<url><loc>https://scifaro.com/en/abs/adiff-explaining-audio-difference-using-natural-language-2502.04476</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adiff-explaining-audio-difference-using-natural-language-2502.04476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adiff-explaining-audio-difference-using-natural-language-2502.04476"/></url>
<url><loc>https://scifaro.com/en/abs/improvnet-generating-controllable-musical-improvisations-with-iterative-corruption-refinement-2502.04522</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvnet-generating-controllable-musical-improvisations-with-iterative-corruption-refinement-2502.04522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvnet-generating-controllable-musical-improvisations-with-iterative-corruption-refinement-2502.04522"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-frequency-adaptive-knowledge-distillation-for-speech-enhancement-2502.04711</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-frequency-adaptive-knowledge-distillation-for-speech-enhancement-2502.04711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-frequency-adaptive-knowledge-distillation-for-speech-enhancement-2502.04711"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-conversion-with-accompaniment-using-self-supervised-representation-based-melody-features-2502.04722</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-conversion-with-accompaniment-using-self-supervised-representation-based-melody-features-2502.04722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-conversion-with-accompaniment-using-self-supervised-representation-based-melody-features-2502.04722"/></url>
<url><loc>https://scifaro.com/en/abs/latent-swap-joint-diffusion-for-2d-long-form-latent-generation-2502.05130</loc><lastmod>2025-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-swap-joint-diffusion-for-2d-long-form-latent-generation-2502.05130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-swap-joint-diffusion-for-2d-long-form-latent-generation-2502.05130"/></url>
<url><loc>https://scifaro.com/en/abs/meta-audiobox-aesthetics-unified-automatic-quality-assessment-for-speech-music-and-sound-2502.05139</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-audiobox-aesthetics-unified-automatic-quality-assessment-for-speech-music-and-sound-2502.05139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-audiobox-aesthetics-unified-automatic-quality-assessment-for-speech-music-and-sound-2502.05139"/></url>
<url><loc>https://scifaro.com/en/abs/aligner-encoders-self-attention-transformers-can-be-self-transducers-2502.05232</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aligner-encoders-self-attention-transformers-can-be-self-transducers-2502.05232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aligner-encoders-self-attention-transformers-can-be-self-transducers-2502.05232"/></url>
<url><loc>https://scifaro.com/en/abs/koel-tts-enhancing-llm-based-speech-generation-with-preference-alignment-and-classifier-free-guidance-2502.05236</loc><lastmod>2025-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/koel-tts-enhancing-llm-based-speech-generation-with-preference-alignment-and-classifier-free-guidance-2502.05236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/koel-tts-enhancing-llm-based-speech-generation-with-preference-alignment-and-classifier-free-guidance-2502.05236"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-expressive-voice-conversion-with-discrete-pitch-conditioned-flow-matching-model-2502.05471</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-expressive-voice-conversion-with-discrete-pitch-conditioned-flow-matching-model-2502.05471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-expressive-voice-conversion-with-discrete-pitch-conditioned-flow-matching-model-2502.05471"/></url>
<url><loc>https://scifaro.com/en/abs/indextts-an-industrial-level-controllable-and-efficient-zero-shot-text-to-speech-system-2502.05512</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/indextts-an-industrial-level-controllable-and-efficient-zero-shot-text-to-speech-system-2502.05512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/indextts-an-industrial-level-controllable-and-efficient-zero-shot-text-to-speech-system-2502.05512"/></url>
<url><loc>https://scifaro.com/en/abs/large-language-model-based-nonnegative-matrix-factorization-for-cardiorespiratory-sound-separation-2502.05757</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-language-model-based-nonnegative-matrix-factorization-for-cardiorespiratory-sound-separation-2502.05757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-language-model-based-nonnegative-matrix-factorization-for-cardiorespiratory-sound-separation-2502.05757"/></url>
<url><loc>https://scifaro.com/en/abs/an-adaptive-filter-bank-based-neural-network-approach-for-time-delay-estimation-and-speech-enhancement-2502.06098</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adaptive-filter-bank-based-neural-network-approach-for-time-delay-estimation-and-speech-enhancement-2502.06098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adaptive-filter-bank-based-neural-network-approach-for-time-delay-estimation-and-speech-enhancement-2502.06098"/></url>
<url><loc>https://scifaro.com/en/abs/calibration-of-multiple-asynchronous-microphone-arrays-using-hybrid-tdoa-2502.06195</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calibration-of-multiple-asynchronous-microphone-arrays-using-hybrid-tdoa-2502.06195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calibration-of-multiple-asynchronous-microphone-arrays-using-hybrid-tdoa-2502.06195"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-microphone-speaker-extraction-using-relative-transfer-functions-2502.06285</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-microphone-speaker-extraction-using-relative-transfer-functions-2502.06285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-microphone-speaker-extraction-using-relative-transfer-functions-2502.06285"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-identification-of-samples-in-hip-hop-music-via-multi-loss-training-and-an-artificial-dataset-2502.06364</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-identification-of-samples-in-hip-hop-music-via-multi-loss-training-and-an-artificial-dataset-2502.06364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-identification-of-samples-in-hip-hop-music-via-multi-loss-training-and-an-artificial-dataset-2502.06364"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-deep-audio-representations-for-hearables-2502.06664</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-deep-audio-representations-for-hearables-2502.06664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-deep-audio-representations-for-hearables-2502.06664"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-audio-helps-for-cognitive-state-tasks-2502.06922</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-audio-helps-for-cognitive-state-tasks-2502.06922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-audio-helps-for-cognitive-state-tasks-2502.06922"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-central-frequencies-locally-competitive-algorithm-for-speech-2502.06989</loc><lastmod>2025-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-central-frequencies-locally-competitive-algorithm-for-speech-2502.06989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-central-frequencies-locally-competitive-algorithm-for-speech-2502.06989"/></url>
<url><loc>https://scifaro.com/en/abs/vevo-controllable-zero-shot-voice-imitation-with-self-supervised-disentanglement-2502.07243</loc><lastmod>2025-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vevo-controllable-zero-shot-voice-imitation-with-self-supervised-disentanglement-2502.07243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vevo-controllable-zero-shot-voice-imitation-with-self-supervised-disentanglement-2502.07243"/></url>
<url><loc>https://scifaro.com/en/abs/music-for-all-representational-bias-and-cross-cultural-adaptability-of-music-generation-models-2502.07328</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-for-all-representational-bias-and-cross-cultural-adaptability-of-music-generation-models-2502.07328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-for-all-representational-bias-and-cross-cultural-adaptability-of-music-generation-models-2502.07328"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-zero-shot-text-to-speech-for-background-removal-and-preservation-with-controllable-masked-speech-prediction-2502.07345</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-zero-shot-text-to-speech-for-background-removal-and-preservation-with-controllable-masked-speech-prediction-2502.07345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-zero-shot-text-to-speech-for-background-removal-and-preservation-with-controllable-masked-speech-prediction-2502.07345"/></url>
<url><loc>https://scifaro.com/en/abs/jamendomaxcaps-a-large-scale-music-caption-dataset-with-imputed-metadata-2502.07461</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jamendomaxcaps-a-large-scale-music-caption-dataset-with-imputed-metadata-2502.07461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jamendomaxcaps-a-large-scale-music-caption-dataset-with-imputed-metadata-2502.07461"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-and-transposition-constraints-arising-from-the-use-of-the-roland-tr-808-bass-drum-2502.07524</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-and-transposition-constraints-arising-from-the-use-of-the-roland-tr-808-bass-drum-2502.07524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-and-transposition-constraints-arising-from-the-use-of-the-roland-tr-808-bass-drum-2502.07524"/></url>
<url><loc>https://scifaro.com/en/abs/lorp-tts-low-rank-personalized-text-to-speech-2502.07562</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lorp-tts-low-rank-personalized-text-to-speech-2502.07562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lorp-tts-low-rank-personalized-text-to-speech-2502.07562"/></url>
<url><loc>https://scifaro.com/en/abs/hookpad-aria-a-copilot-for-songwriters-2502.08122</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hookpad-aria-a-copilot-for-songwriters-2502.08122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hookpad-aria-a-copilot-for-songwriters-2502.08122"/></url>
<url><loc>https://scifaro.com/en/abs/methods-for-pitch-analysis-in-contemporary-popular-music-highlighting-pitch-uncertainty-in-primaal-s-commercial-works-2502.08131</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/methods-for-pitch-analysis-in-contemporary-popular-music-highlighting-pitch-uncertainty-in-primaal-s-commercial-works-2502.08131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/methods-for-pitch-analysis-in-contemporary-popular-music-highlighting-pitch-uncertainty-in-primaal-s-commercial-works-2502.08131"/></url>
<url><loc>https://scifaro.com/en/abs/dualstream-contextual-fusion-network-efficient-target-speaker-extraction-by-leveraging-mixture-and-enrollment-interactions-2502.08191</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualstream-contextual-fusion-network-efficient-target-speaker-extraction-by-leveraging-mixture-and-enrollment-interactions-2502.08191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualstream-contextual-fusion-network-efficient-target-speaker-extraction-by-leveraging-mixture-and-enrollment-interactions-2502.08191"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-lstm-by-attention-mechanism-for-early-detection-of-parkinson-s-disease-through-voice-signals-2502.08672</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-lstm-by-attention-mechanism-for-early-detection-of-parkinson-s-disease-through-voice-signals-2502.08672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-lstm-by-attention-mechanism-for-early-detection-of-parkinson-s-disease-through-voice-signals-2502.08672"/></url>
<url><loc>https://scifaro.com/en/abs/tokensynth-a-token-based-neural-synthesizer-for-instrument-cloning-and-text-to-instrument-2502.08939</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tokensynth-a-token-based-neural-synthesizer-for-instrument-cloning-and-text-to-instrument-2502.08939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tokensynth-a-token-based-neural-synthesizer-for-instrument-cloning-and-text-to-instrument-2502.08939"/></url>
<url><loc>https://scifaro.com/en/abs/balancing-physical-modeling-and-musical-requirements-algorithmically-simulating-the-calls-of-hyalessa-maculaticollis-for-real-time-instrumental-control-2502.09459</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/balancing-physical-modeling-and-musical-requirements-algorithmically-simulating-the-calls-of-hyalessa-maculaticollis-for-real-time-instrumental-control-2502.09459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/balancing-physical-modeling-and-musical-requirements-algorithmically-simulating-the-calls-of-hyalessa-maculaticollis-for-real-time-instrumental-control-2502.09459"/></url>
<url><loc>https://scifaro.com/en/abs/autoprosody-a-prosodic-feature-extraction-tool-for-indian-languages-2502.09661</loc><lastmod>2026-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoprosody-a-prosodic-feature-extraction-tool-for-indian-languages-2502.09661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoprosody-a-prosodic-feature-extraction-tool-for-indian-languages-2502.09661"/></url>
<url><loc>https://scifaro.com/en/abs/intergridnet-an-electric-network-frequency-approach-for-audio-source-location-classification-using-convolutional-neural-networks-2502.10011</loc><lastmod>2025-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intergridnet-an-electric-network-frequency-approach-for-audio-source-location-classification-using-convolutional-neural-networks-2502.10011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intergridnet-an-electric-network-frequency-approach-for-audio-source-location-classification-using-convolutional-neural-networks-2502.10011"/></url>
<url><loc>https://scifaro.com/en/abs/video-soundtrack-generation-by-aligning-emotions-and-temporal-boundaries-2502.10154</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video-soundtrack-generation-by-aligning-emotions-and-temporal-boundaries-2502.10154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video-soundtrack-generation-by-aligning-emotions-and-temporal-boundaries-2502.10154"/></url>
<url><loc>https://scifaro.com/en/abs/vocalcrypt-novel-active-defense-against-deepfake-voice-based-on-masking-effect-2502.10329</loc><lastmod>2025-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocalcrypt-novel-active-defense-against-deepfake-voice-based-on-masking-effect-2502.10329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocalcrypt-novel-active-defense-against-deepfake-voice-based-on-masking-effect-2502.10329"/></url>
<url><loc>https://scifaro.com/en/abs/clamp-3-universal-music-information-retrieval-across-unaligned-modalities-and-unseen-languages-2502.10362</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clamp-3-universal-music-information-retrieval-across-unaligned-modalities-and-unseen-languages-2502.10362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clamp-3-universal-music-information-retrieval-across-unaligned-modalities-and-unseen-languages-2502.10362"/></url>
<url><loc>https://scifaro.com/en/abs/ynote-a-novel-music-notation-for-fine-tuning-llms-in-music-generation-2502.10467</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ynote-a-novel-music-notation-for-fine-tuning-llms-in-music-generation-2502.10467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ynote-a-novel-music-notation-for-fine-tuning-llms-in-music-generation-2502.10467"/></url>
<url><loc>https://scifaro.com/en/abs/f-stripe-fast-structure-informed-positional-encoding-for-symbolic-music-generation-2502.10491</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f-stripe-fast-structure-informed-positional-encoding-for-symbolic-music-generation-2502.10491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f-stripe-fast-structure-informed-positional-encoding-for-symbolic-music-generation-2502.10491"/></url>
<url><loc>https://scifaro.com/en/abs/hyperdimensional-intelligent-sensing-for-efficient-real-time-audio-processing-on-extreme-edge-2502.10718</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperdimensional-intelligent-sensing-for-efficient-real-time-audio-processing-on-extreme-edge-2502.10718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperdimensional-intelligent-sensing-for-efficient-real-time-audio-processing-on-extreme-edge-2502.10718"/></url>
<url><loc>https://scifaro.com/en/abs/syncspeech-efficient-and-low-latency-text-to-speech-based-on-temporal-masked-transformer-2502.11094</loc><lastmod>2026-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syncspeech-efficient-and-low-latency-text-to-speech-based-on-temporal-masked-transformer-2502.11094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syncspeech-efficient-and-low-latency-text-to-speech-based-on-temporal-masked-transformer-2502.11094"/></url>
<url><loc>https://scifaro.com/en/abs/throat-and-acoustic-paired-speech-dataset-for-deep-learning-based-speech-enhancement-2502.11478</loc><lastmod>2026-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/throat-and-acoustic-paired-speech-dataset-for-deep-learning-based-speech-enhancement-2502.11478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/throat-and-acoustic-paired-speech-dataset-for-deep-learning-based-speech-enhancement-2502.11478"/></url>
<url><loc>https://scifaro.com/en/abs/nablafx-a-framework-for-differentiable-black-box-and-gray-box-modeling-of-audio-effects-2502.11668</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nablafx-a-framework-for-differentiable-black-box-and-gray-box-modeling-of-audio-effects-2502.11668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nablafx-a-framework-for-differentiable-black-box-and-gray-box-modeling-of-audio-effects-2502.11668"/></url>
<url><loc>https://scifaro.com/en/abs/chordformer-a-conformer-based-architecture-for-large-vocabulary-audio-chord-recognition-2502.11840</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chordformer-a-conformer-based-architecture-for-large-vocabulary-audio-chord-recognition-2502.11840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chordformer-a-conformer-based-architecture-for-large-vocabulary-audio-chord-recognition-2502.11840"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-audio-visual-adversarial-vulnerability-from-temporal-and-modality-perspectives-2502.11858</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-audio-visual-adversarial-vulnerability-from-temporal-and-modality-perspectives-2502.11858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-audio-visual-adversarial-vulnerability-from-temporal-and-modality-perspectives-2502.11858"/></url>
<url><loc>https://scifaro.com/en/abs/naturall2s-end-to-end-high-quality-multispeaker-lip-to-speech-synthesis-with-differential-digital-signal-processing-2502.12002</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturall2s-end-to-end-high-quality-multispeaker-lip-to-speech-synthesis-with-differential-digital-signal-processing-2502.12002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturall2s-end-to-end-high-quality-multispeaker-lip-to-speech-synthesis-with-differential-digital-signal-processing-2502.12002"/></url>
<url><loc>https://scifaro.com/en/abs/masked-latent-prediction-and-classification-for-self-supervised-audio-representation-learning-2502.12031</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-latent-prediction-and-classification-for-self-supervised-audio-representation-learning-2502.12031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-latent-prediction-and-classification-for-self-supervised-audio-representation-learning-2502.12031"/></url>
<url><loc>https://scifaro.com/en/abs/note-level-singing-melody-transcription-for-time-aligned-musical-score-generation-2502.12438</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/note-level-singing-melody-transcription-for-time-aligned-musical-score-generation-2502.12438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/note-level-singing-melody-transcription-for-time-aligned-musical-score-generation-2502.12438"/></url>
<url><loc>https://scifaro.com/en/abs/myna-masking-based-contrastive-learning-of-musical-representations-2502.12511</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/myna-masking-based-contrastive-learning-of-musical-representations-2502.12511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/myna-masking-based-contrastive-learning-of-musical-representations-2502.12511"/></url>
<url><loc>https://scifaro.com/en/abs/techsinger-technique-controllable-multilingual-singing-voice-synthesis-via-flow-matching-2502.12572</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/techsinger-technique-controllable-multilingual-singing-voice-synthesis-via-flow-matching-2502.12572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/techsinger-technique-controllable-multilingual-singing-voice-synthesis-via-flow-matching-2502.12572"/></url>
<url><loc>https://scifaro.com/en/abs/deepresonance-enhancing-multimodal-music-understanding-via-music-centric-multi-way-instruction-tuning-2502.12623</loc><lastmod>2025-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepresonance-enhancing-multimodal-music-understanding-via-music-centric-multi-way-instruction-tuning-2502.12623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepresonance-enhancing-multimodal-music-understanding-via-music-centric-multi-way-instruction-tuning-2502.12623"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-music-vocoder-using-neural-audio-codecs-2502.12759</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-music-vocoder-using-neural-audio-codecs-2502.12759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-music-vocoder-using-neural-audio-codecs-2502.12759"/></url>
<url><loc>https://scifaro.com/en/abs/keep-what-you-need-extracting-efficient-subnetworks-from-large-audio-representation-models-2502.12925</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keep-what-you-need-extracting-efficient-subnetworks-from-large-audio-representation-models-2502.12925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keep-what-you-need-extracting-efficient-subnetworks-from-large-audio-representation-models-2502.12925"/></url>
<url><loc>https://scifaro.com/en/abs/skip-that-beat-augmenting-meter-tracking-models-for-underrepresented-time-signatures-2502.12972</loc><lastmod>2025-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skip-that-beat-augmenting-meter-tracking-models-for-underrepresented-time-signatures-2502.12972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skip-that-beat-augmenting-meter-tracking-models-for-underrepresented-time-signatures-2502.12972"/></url>
<url><loc>https://scifaro.com/en/abs/a-dual-stage-time-context-network-for-speech-based-alzheimer-s-disease-detection-2502.13064</loc><lastmod>2026-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dual-stage-time-context-network-for-speech-based-alzheimer-s-disease-detection-2502.13064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dual-stage-time-context-network-for-speech-based-alzheimer-s-disease-detection-2502.13064"/></url>
<url><loc>https://scifaro.com/en/abs/songgen-a-single-stage-auto-regressive-transformer-for-text-to-song-generation-2502.13128</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songgen-a-single-stage-auto-regressive-transformer-for-text-to-song-generation-2502.13128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songgen-a-single-stage-auto-regressive-transformer-for-text-to-song-generation-2502.13128"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-cp-unet-framework-for-denoising-das-data-with-decay-noise-2502.13395</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-cp-unet-framework-for-denoising-das-data-with-decay-noise-2502.13395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-cp-unet-framework-for-denoising-das-data-with-decay-noise-2502.13395"/></url>
<url><loc>https://scifaro.com/en/abs/mats-an-audio-language-model-under-text-only-supervision-2502.13433</loc><lastmod>2026-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mats-an-audio-language-model-under-text-only-supervision-2502.13433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mats-an-audio-language-model-under-text-only-supervision-2502.13433"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-classification-of-bird-vocalizations-2502.13440</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-classification-of-bird-vocalizations-2502.13440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-classification-of-bird-vocalizations-2502.13440"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-classification-of-insect-species-using-machine-learning-models-cicada-beetle-termite-and-cricket-2502.13893</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-classification-of-insect-species-using-machine-learning-models-cicada-beetle-termite-and-cricket-2502.13893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-classification-of-insect-species-using-machine-learning-models-cicada-beetle-termite-and-cricket-2502.13893"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-application-of-visibility-graphs-in-the-spectral-domain-for-speaker-recognition-2502.14110</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-application-of-visibility-graphs-in-the-spectral-domain-for-speaker-recognition-2502.14110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-application-of-visibility-graphs-in-the-spectral-domain-for-speaker-recognition-2502.14110"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-black-box-and-gray-box-modeling-of-nonlinear-audio-effects-2502.14405</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-black-box-and-gray-box-modeling-of-nonlinear-audio-effects-2502.14405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-black-box-and-gray-box-modeling-of-nonlinear-audio-effects-2502.14405"/></url>
<url><loc>https://scifaro.com/en/abs/atri-mitigating-multilingual-audio-text-retrieval-inconsistencies-by-reducing-data-distribution-errors-2502.14627</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atri-mitigating-multilingual-audio-text-retrieval-inconsistencies-by-reducing-data-distribution-errors-2502.14627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atri-mitigating-multilingual-audio-text-retrieval-inconsistencies-by-reducing-data-distribution-errors-2502.14627"/></url>
<url><loc>https://scifaro.com/en/abs/chunkformer-masked-chunking-conformer-for-long-form-speech-transcription-2502.14673</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chunkformer-masked-chunking-conformer-for-long-form-speech-transcription-2502.14673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chunkformer-masked-chunking-conformer-for-long-form-speech-transcription-2502.14673"/></url>
<url><loc>https://scifaro.com/en/abs/segaug-ctc-aligned-segmented-augmentation-for-robust-rnn-transducer-based-speech-recognition-2502.14685</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segaug-ctc-aligned-segmented-augmentation-for-robust-rnn-transducer-based-speech-recognition-2502.14685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segaug-ctc-aligned-segmented-augmentation-for-robust-rnn-transducer-based-speech-recognition-2502.14685"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-imperfect-detecting-audio-deepfakes-through-acoustic-prosodic-analysis-2502.14726</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-imperfect-detecting-audio-deepfakes-through-acoustic-prosodic-analysis-2502.14726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-imperfect-detecting-audio-deepfakes-through-acoustic-prosodic-analysis-2502.14726"/></url>
<url><loc>https://scifaro.com/en/abs/wavrag-audio-integrated-retrieval-augmented-generation-for-spoken-dialogue-models-2502.14727</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavrag-audio-integrated-retrieval-augmented-generation-for-spoken-dialogue-models-2502.14727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavrag-audio-integrated-retrieval-augmented-generation-for-spoken-dialogue-models-2502.14727"/></url>
<url><loc>https://scifaro.com/en/abs/fundamental-survey-on-neuromorphic-based-audio-classification-2502.15056</loc><lastmod>2025-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fundamental-survey-on-neuromorphic-based-audio-classification-2502.15056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fundamental-survey-on-neuromorphic-based-audio-classification-2502.15056"/></url>
<url><loc>https://scifaro.com/en/abs/improving-streaming-speech-recognition-with-time-shifted-contextual-attention-and-dynamic-right-context-masking-2502.15158</loc><lastmod>2025-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-streaming-speech-recognition-with-time-shifted-contextual-attention-and-dynamic-right-context-masking-2502.15158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-streaming-speech-recognition-with-time-shifted-contextual-attention-and-dynamic-right-context-masking-2502.15158"/></url>
<url><loc>https://scifaro.com/en/abs/offload-rethinking-by-cloud-assistance-for-efficient-environmental-sound-recognition-on-lpwans-2502.15285</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/offload-rethinking-by-cloud-assistance-for-efficient-environmental-sound-recognition-on-lpwans-2502.15285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/offload-rethinking-by-cloud-assistance-for-efficient-environmental-sound-recognition-on-lpwans-2502.15285"/></url>
<url><loc>https://scifaro.com/en/abs/kad-no-more-fad-an-effective-and-efficient-evaluation-metric-for-audio-generation-2502.15602</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kad-no-more-fad-an-effective-and-efficient-evaluation-metric-for-audio-generation-2502.15602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kad-no-more-fad-an-effective-and-efficient-evaluation-metric-for-audio-generation-2502.15602"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-machine-learning-for-bowel-sound-pattern-classification-from-tabular-features-to-pretrained-models-2502.15607</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-machine-learning-for-bowel-sound-pattern-classification-from-tabular-features-to-pretrained-models-2502.15607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-machine-learning-for-bowel-sound-pattern-classification-from-tabular-features-to-pretrained-models-2502.15607"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-enhancement-by-cross-and-sub-band-processing-with-state-space-model-2502.16207</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-enhancement-by-cross-and-sub-band-processing-with-state-space-model-2502.16207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-enhancement-by-cross-and-sub-band-processing-with-state-space-model-2502.16207"/></url>
<url><loc>https://scifaro.com/en/abs/audio-flan-a-preliminary-release-2502.16584</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-flan-a-preliminary-release-2502.16584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-flan-a-preliminary-release-2502.16584"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-extraction-through-comparing-noisy-positive-and-negative-audio-enrollments-2502.16611</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-extraction-through-comparing-noisy-positive-and-negative-audio-enrollments-2502.16611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-extraction-through-comparing-noisy-positive-and-negative-audio-enrollments-2502.16611"/></url>
<url><loc>https://scifaro.com/en/abs/aad-llm-neural-attention-driven-auditory-scene-understanding-2502.16794</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aad-llm-neural-attention-driven-auditory-scene-understanding-2502.16794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aad-llm-neural-attention-driven-auditory-scene-understanding-2502.16794"/></url>
<url><loc>https://scifaro.com/en/abs/enact-heart-ensemble-based-assessment-using-cnn-and-transformer-on-heart-sounds-2502.16914</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enact-heart-ensemble-based-assessment-using-cnn-and-transformer-on-heart-sounds-2502.16914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enact-heart-ensemble-based-assessment-using-cnn-and-transformer-on-heart-sounds-2502.16914"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-contrastive-learning-from-weakly-labeled-audio-segments-for-musical-version-matching-2502.16936</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-contrastive-learning-from-weakly-labeled-audio-segments-for-musical-version-matching-2502.16936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-contrastive-learning-from-weakly-labeled-audio-segments-for-musical-version-matching-2502.16936"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-and-sparse-model-merging-for-multi-lingual-speech-recognition-and-translation-2502.17380</loc><lastmod>2025-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-and-sparse-model-merging-for-multi-lingual-speech-recognition-and-translation-2502.17380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-and-sparse-model-merging-for-multi-lingual-speech-recognition-and-translation-2502.17380"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-noise-masking-with-music-through-deep-spectral-envelope-shaping-2502.17527</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-noise-masking-with-music-through-deep-spectral-envelope-shaping-2502.17527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-noise-masking-with-music-through-deep-spectral-envelope-shaping-2502.17527"/></url>
<url><loc>https://scifaro.com/en/abs/vanpy-voice-analysis-framework-2502.17579</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vanpy-voice-analysis-framework-2502.17579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vanpy-voice-analysis-framework-2502.17579"/></url>
<url><loc>https://scifaro.com/en/abs/the-gigamidi-dataset-with-features-for-expressive-music-performance-detection-2502.17726</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-gigamidi-dataset-with-features-for-expressive-music-performance-detection-2502.17726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-gigamidi-dataset-with-features-for-expressive-music-performance-detection-2502.17726"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-quality-through-the-integration-of-bgru-and-transformer-architectures-2502.17911</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-quality-through-the-integration-of-bgru-and-transformer-architectures-2502.17911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-quality-through-the-integration-of-bgru-and-transformer-architectures-2502.17911"/></url>
<url><loc>https://scifaro.com/en/abs/notagen-advancing-musicality-in-symbolic-music-generation-with-large-language-model-training-paradigms-2502.18008</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/notagen-advancing-musicality-in-symbolic-music-generation-with-large-language-model-training-paradigms-2502.18008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/notagen-advancing-musicality-in-symbolic-music-generation-with-large-language-model-training-paradigms-2502.18008"/></url>
<url><loc>https://scifaro.com/en/abs/determined-blind-source-separation-with-sinkhorn-divergence-based-optimal-allocation-of-the-source-power-2502.18182</loc><lastmod>2025-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/determined-blind-source-separation-with-sinkhorn-divergence-based-optimal-allocation-of-the-source-power-2502.18182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/determined-blind-source-separation-with-sinkhorn-divergence-based-optimal-allocation-of-the-source-power-2502.18182"/></url>
<url><loc>https://scifaro.com/en/abs/steering-language-model-to-stable-speech-emotion-recognition-via-contextual-perception-and-chain-of-thought-2502.18186</loc><lastmod>2025-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steering-language-model-to-stable-speech-emotion-recognition-via-contextual-perception-and-chain-of-thought-2502.18186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steering-language-model-to-stable-speech-emotion-recognition-via-contextual-perception-and-chain-of-thought-2502.18186"/></url>
<url><loc>https://scifaro.com/en/abs/from-vision-to-sound-advancing-audio-anomaly-detection-with-vision-based-algorithms-2502.18328</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-vision-to-sound-advancing-audio-anomaly-detection-with-vision-based-algorithms-2502.18328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-vision-to-sound-advancing-audio-anomaly-detection-with-vision-based-algorithms-2502.18328"/></url>
<url><loc>https://scifaro.com/en/abs/clip-tts-contrastive-text-content-and-mel-spectrogram-a-high-quality-text-to-speech-method-based-on-contextual-semantic-understanding-2502.18889</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clip-tts-contrastive-text-content-and-mel-spectrogram-a-high-quality-text-to-speech-method-based-on-contextual-semantic-understanding-2502.18889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clip-tts-contrastive-text-content-and-mel-spectrogram-a-high-quality-text-to-speech-method-based-on-contextual-semantic-understanding-2502.18889"/></url>
<url><loc>https://scifaro.com/en/abs/dualspec-text-to-spatial-audio-generation-via-dual-spectrogram-guided-diffusion-model-2502.18952</loc><lastmod>2025-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualspec-text-to-spatial-audio-generation-via-dual-spectrogram-guided-diffusion-model-2502.18952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualspec-text-to-spatial-audio-generation-via-dual-spectrogram-guided-diffusion-model-2502.18952"/></url>
<url><loc>https://scifaro.com/en/abs/filtro-adaptativo-y-modulo-de-grabacion-en-dispositivo-para-mejora-en-la-calidad-de-audicion-2502.19444</loc><lastmod>2025-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filtro-adaptativo-y-modulo-de-grabacion-en-dispositivo-para-mejora-en-la-calidad-de-audicion-2502.19444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filtro-adaptativo-y-modulo-de-grabacion-en-dispositivo-para-mejora-en-la-calidad-de-audicion-2502.19444"/></url>
<url><loc>https://scifaro.com/en/abs/does-your-voice-assistant-remember-analyzing-conversational-context-recall-and-utilization-in-voice-interaction-models-2502.19759</loc><lastmod>2025-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-your-voice-assistant-remember-analyzing-conversational-context-recall-and-utilization-in-voice-interaction-models-2502.19759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-your-voice-assistant-remember-analyzing-conversational-context-recall-and-utilization-in-voice-interaction-models-2502.19759"/></url>
<url><loc>https://scifaro.com/en/abs/diffcss-diverse-and-expressive-conversational-speech-synthesis-with-diffusion-models-2502.19924</loc><lastmod>2025-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffcss-diverse-and-expressive-conversational-speech-synthesis-with-diffusion-models-2502.19924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffcss-diverse-and-expressive-conversational-speech-synthesis-with-diffusion-models-2502.19924"/></url>
<url><loc>https://scifaro.com/en/abs/dgfm-full-body-dance-generation-driven-by-music-foundation-models-2502.20176</loc><lastmod>2025-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dgfm-full-body-dance-generation-driven-by-music-foundation-models-2502.20176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dgfm-full-body-dance-generation-driven-by-music-foundation-models-2502.20176"/></url>
<url><loc>https://scifaro.com/en/abs/din-cts-low-complexity-depthwise-inception-neural-network-with-contrastive-training-strategy-for-deepfake-speech-detection-2502.20225</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/din-cts-low-complexity-depthwise-inception-neural-network-with-contrastive-training-strategy-for-deepfake-speech-detection-2502.20225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/din-cts-low-complexity-depthwise-inception-neural-network-with-contrastive-training-strategy-for-deepfake-speech-detection-2502.20225"/></url>
<url><loc>https://scifaro.com/en/abs/on-adversarial-attacks-in-acoustic-drone-localization-2502.20325</loc><lastmod>2026-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-adversarial-attacks-in-acoustic-drone-localization-2502.20325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-adversarial-attacks-in-acoustic-drone-localization-2502.20325"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-detection-and-temporal-localization-of-whale-calls-in-long-duration-bioacoustic-data-2502.20838</loc><lastmod>2026-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-detection-and-temporal-localization-of-whale-calls-in-long-duration-bioacoustic-data-2502.20838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-detection-and-temporal-localization-of-whale-calls-in-long-duration-bioacoustic-data-2502.20838"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-filtering-of-cross-spectral-matrices-using-generative-adversarial-networks-2502.21097</loc><lastmod>2025-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-filtering-of-cross-spectral-matrices-using-generative-adversarial-networks-2502.21097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-filtering-of-cross-spectral-matrices-using-generative-adversarial-networks-2502.21097"/></url>
<url><loc>https://scifaro.com/en/abs/inspiremusic-integrating-super-resolution-and-large-language-model-for-high-fidelity-long-form-music-generation-2503.00084</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inspiremusic-integrating-super-resolution-and-large-language-model-for-high-fidelity-long-form-music-generation-2503.00084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inspiremusic-integrating-super-resolution-and-large-language-model-for-high-fidelity-long-form-music-generation-2503.00084"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-data-enables-context-aware-bioacoustic-sound-event-detection-2503.00296</loc><lastmod>2025-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-data-enables-context-aware-bioacoustic-sound-event-detection-2503.00296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-data-enables-context-aware-bioacoustic-sound-event-detection-2503.00296"/></url>
<url><loc>https://scifaro.com/en/abs/language-model-mapping-in-multimodal-music-learning-a-grand-challenge-proposal-2503.00427</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-model-mapping-in-multimodal-music-learning-a-grand-challenge-proposal-2503.00427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-model-mapping-in-multimodal-music-learning-a-grand-challenge-proposal-2503.00427"/></url>
<url><loc>https://scifaro.com/en/abs/podagent-a-comprehensive-framework-for-podcast-generation-2503.00455</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/podagent-a-comprehensive-framework-for-podcast-generation-2503.00455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/podagent-a-comprehensive-framework-for-podcast-generation-2503.00455"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-anomaly-detection-on-uam-propeller-defect-with-acoustic-dataset-for-crack-of-drone-propeller-adcp-2503.00790</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-on-uam-propeller-defect-with-acoustic-dataset-for-crack-of-drone-propeller-adcp-2503.00790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-on-uam-propeller-defect-with-acoustic-dataset-for-crack-of-drone-propeller-adcp-2503.00790"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-vulnerabilities-in-speech-translation-systems-through-targeted-adversarial-attacks-2503.00957</loc><lastmod>2025-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-vulnerabilities-in-speech-translation-systems-through-targeted-adversarial-attacks-2503.00957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-vulnerabilities-in-speech-translation-systems-through-targeted-adversarial-attacks-2503.00957"/></url>
<url><loc>https://scifaro.com/en/abs/voice-cloning-for-dysarthric-speech-synthesis-addressing-data-scarcity-in-speech-language-pathology-2503.01266</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-cloning-for-dysarthric-speech-synthesis-addressing-data-scarcity-in-speech-language-pathology-2503.01266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-cloning-for-dysarthric-speech-synthesis-addressing-data-scarcity-in-speech-language-pathology-2503.01266"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-piano-transcription-based-on-consistent-onset-and-offset-decoding-with-sustain-pedal-detection-2503.01362</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-piano-transcription-based-on-consistent-onset-and-offset-decoding-with-sustain-pedal-detection-2503.01362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-piano-transcription-based-on-consistent-onset-and-offset-decoding-with-sustain-pedal-detection-2503.01362"/></url>
<url><loc>https://scifaro.com/en/abs/flowdec-a-flow-based-full-band-general-audio-codec-with-high-perceptual-quality-2503.01485</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowdec-a-flow-based-full-band-general-audio-codec-with-high-perceptual-quality-2503.01485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowdec-a-flow-based-full-band-general-audio-codec-with-high-perceptual-quality-2503.01485"/></url>
<url><loc>https://scifaro.com/en/abs/spark-tts-an-efficient-llm-based-text-to-speech-model-with-single-stream-decoupled-speech-tokens-2503.01710</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spark-tts-an-efficient-llm-based-text-to-speech-model-with-single-stream-decoupled-speech-tokens-2503.01710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spark-tts-an-efficient-llm-based-text-to-speech-model-with-single-stream-decoupled-speech-tokens-2503.01710"/></url>
<url><loc>https://scifaro.com/en/abs/audio-reasoner-improving-reasoning-capability-in-large-audio-language-models-2503.02318</loc><lastmod>2025-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-reasoner-improving-reasoning-capability-in-large-audio-language-models-2503.02318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-reasoner-improving-reasoning-capability-in-large-audio-language-models-2503.02318"/></url>
<url><loc>https://scifaro.com/en/abs/robust-detection-of-overlapping-bioacoustic-sound-events-2503.02389</loc><lastmod>2025-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-detection-of-overlapping-bioacoustic-sound-events-2503.02389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-detection-of-overlapping-bioacoustic-sound-events-2503.02389"/></url>
<url><loc>https://scifaro.com/en/abs/aggregation-strategies-for-efficient-annotation-of-bioacoustic-sound-events-using-active-learning-2503.02422</loc><lastmod>2025-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aggregation-strategies-for-efficient-annotation-of-bioacoustic-sound-events-using-active-learning-2503.02422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aggregation-strategies-for-efficient-annotation-of-bioacoustic-sound-events-using-active-learning-2503.02422"/></url>
<url><loc>https://scifaro.com/en/abs/as-good-as-it-kan-get-high-fidelity-audio-representation-2503.02585</loc><lastmod>2025-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/as-good-as-it-kan-get-high-fidelity-audio-representation-2503.02585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/as-good-as-it-kan-get-high-fidelity-audio-representation-2503.02585"/></url>
<url><loc>https://scifaro.com/en/abs/inserter-speech-instruction-following-with-unsupervised-interleaved-pre-training-2503.02769</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inserter-speech-instruction-following-with-unsupervised-interleaved-pre-training-2503.02769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inserter-speech-instruction-following-with-unsupervised-interleaved-pre-training-2503.02769"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-symphony-integrating-taste-and-sound-through-generative-ai-2503.02823</loc><lastmod>2025-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-symphony-integrating-taste-and-sound-through-generative-ai-2503.02823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-symphony-integrating-taste-and-sound-through-generative-ai-2503.02823"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-whisper-for-inclusive-prosodic-stress-analysis-2503.02907</loc><lastmod>2025-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-whisper-for-inclusive-prosodic-stress-analysis-2503.02907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-whisper-for-inclusive-prosodic-stress-analysis-2503.02907"/></url>
<url><loc>https://scifaro.com/en/abs/lead-instrument-detection-from-multitrack-music-2503.03232</loc><lastmod>2025-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lead-instrument-detection-from-multitrack-music-2503.03232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lead-instrument-detection-from-multitrack-music-2503.03232"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-finetuning-for-dimensional-speech-emotion-recognition-in-the-age-of-transformers-2503.03756</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-finetuning-for-dimensional-speech-emotion-recognition-in-the-age-of-transformers-2503.03756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-finetuning-for-dimensional-speech-emotion-recognition-in-the-age-of-transformers-2503.03756"/></url>
<url><loc>https://scifaro.com/en/abs/voicegrpo-modern-moe-transformers-with-group-relative-policy-optimization-grpo-for-ai-voice-health-care-applications-on-voice-pathology-detection-2503.03797</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicegrpo-modern-moe-transformers-with-group-relative-policy-optimization-grpo-for-ai-voice-health-care-applications-on-voice-pathology-detection-2503.03797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicegrpo-modern-moe-transformers-with-group-relative-policy-optimization-grpo-for-ai-voice-health-care-applications-on-voice-pathology-detection-2503.03797"/></url>
<url><loc>https://scifaro.com/en/abs/audio-flamingo-2-an-audio-language-model-with-long-audio-understanding-and-expert-reasoning-abilities-2503.03983</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-flamingo-2-an-audio-language-model-with-long-audio-understanding-and-expert-reasoning-abilities-2503.03983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-flamingo-2-an-audio-language-model-with-long-audio-understanding-and-expert-reasoning-abilities-2503.03983"/></url>
<url><loc>https://scifaro.com/en/abs/tail-text-audio-incremental-learning-2503.04258</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tail-text-audio-incremental-learning-2503.04258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tail-text-audio-incremental-learning-2503.04258"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-models-for-phoneme-recognition-applications-in-children-s-speech-for-reading-learning-2503.04710</loc><lastmod>2025-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-models-for-phoneme-recognition-applications-in-children-s-speech-for-reading-learning-2503.04710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-models-for-phoneme-recognition-applications-in-children-s-speech-for-reading-learning-2503.04710"/></url>
<url><loc>https://scifaro.com/en/abs/uniarray-unified-spectral-spatial-modeling-for-array-geometry-agnostic-speech-separation-2503.05110</loc><lastmod>2025-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniarray-unified-spectral-spatial-modeling-for-array-geometry-agnostic-speech-separation-2503.05110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniarray-unified-spectral-spatial-modeling-for-array-geometry-agnostic-speech-separation-2503.05110"/></url>
<url><loc>https://scifaro.com/en/abs/divise-direct-visual-input-speech-synthesis-preserving-speaker-characteristics-and-intelligibility-2503.05223</loc><lastmod>2025-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/divise-direct-visual-input-speech-synthesis-preserving-speaker-characteristics-and-intelligibility-2503.05223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/divise-direct-visual-input-speech-synthesis-preserving-speaker-characteristics-and-intelligibility-2503.05223"/></url>
<url><loc>https://scifaro.com/en/abs/bimodal-connection-attention-fusion-for-speech-emotion-recognition-2503.05858</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bimodal-connection-attention-fusion-for-speech-emotion-recognition-2503.05858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bimodal-connection-attention-fusion-for-speech-emotion-recognition-2503.05858"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-image-encoding-for-improved-voice-characteristic-detection-using-deep-convolutional-neural-networks-2503.05929</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-image-encoding-for-improved-voice-characteristic-detection-using-deep-convolutional-neural-networks-2503.05929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-image-encoding-for-improved-voice-characteristic-detection-using-deep-convolutional-neural-networks-2503.05929"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-expressive-personality-recognition-in-data-non-ideal-audiovisual-based-on-multi-scale-feature-enhancement-and-modal-augment-2503.06108</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-expressive-personality-recognition-in-data-non-ideal-audiovisual-based-on-multi-scale-feature-enhancement-and-modal-augment-2503.06108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-expressive-personality-recognition-in-data-non-ideal-audiovisual-based-on-multi-scale-feature-enhancement-and-modal-augment-2503.06108"/></url>
<url><loc>https://scifaro.com/en/abs/infant-cry-detection-using-causal-temporal-representation-2503.06247</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infant-cry-detection-using-causal-temporal-representation-2503.06247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infant-cry-detection-using-causal-temporal-representation-2503.06247"/></url>
<url><loc>https://scifaro.com/en/abs/accompaniment-prompt-adherence-a-measure-for-evaluating-music-accompaniment-systems-2503.06346</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accompaniment-prompt-adherence-a-measure-for-evaluating-music-accompaniment-systems-2503.06346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accompaniment-prompt-adherence-a-measure-for-evaluating-music-accompaniment-systems-2503.06346"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-score-follower-for-computer-accompaniment-of-polyphonic-musical-instruments-2503.06348</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-score-follower-for-computer-accompaniment-of-polyphonic-musical-instruments-2503.06348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-score-follower-for-computer-accompaniment-of-polyphonic-musical-instruments-2503.06348"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-bimodal-attention-fusion-for-speech-emotion-recognition-2503.06405</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-bimodal-attention-fusion-for-speech-emotion-recognition-2503.06405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-bimodal-attention-fusion-for-speech-emotion-recognition-2503.06405"/></url>
<url><loc>https://scifaro.com/en/abs/speech-audio-generation-from-dynamic-mri-via-a-knowledge-enhanced-conditional-variational-autoencoder-2503.06588</loc><lastmod>2025-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-audio-generation-from-dynamic-mri-via-a-knowledge-enhanced-conditional-variational-autoencoder-2503.06588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-audio-generation-from-dynamic-mri-via-a-knowledge-enhanced-conditional-variational-autoencoder-2503.06588"/></url>
<url><loc>https://scifaro.com/en/abs/synchronized-video-to-audio-generation-via-mel-quantization-continuum-decomposition-2503.06984</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synchronized-video-to-audio-generation-via-mel-quantization-continuum-decomposition-2503.06984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synchronized-video-to-audio-generation-via-mel-quantization-continuum-decomposition-2503.06984"/></url>
<url><loc>https://scifaro.com/en/abs/reelwave-multi-agentic-movie-sound-generation-through-multimodal-llm-conversation-2503.07217</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reelwave-multi-agentic-movie-sound-generation-through-multimodal-llm-conversation-2503.07217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reelwave-multi-agentic-movie-sound-generation-through-multimodal-llm-conversation-2503.07217"/></url>
<url><loc>https://scifaro.com/en/abs/boundary-regression-for-leitmotif-detection-in-music-audio-2503.07977</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boundary-regression-for-leitmotif-detection-in-music-audio-2503.07977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boundary-regression-for-leitmotif-detection-in-music-audio-2503.07977"/></url>
<url><loc>https://scifaro.com/en/abs/mellow-a-small-audio-language-model-for-reasoning-2503.08540</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mellow-a-small-audio-language-model-for-reasoning-2503.08540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mellow-a-small-audio-language-model-for-reasoning-2503.08540"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-speech-extraction-leveraging-textual-history-as-an-implicit-cue-for-target-speech-extraction-2503.08798</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-speech-extraction-leveraging-textual-history-as-an-implicit-cue-for-target-speech-extraction-2503.08798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-speech-extraction-leveraging-textual-history-as-an-implicit-cue-for-target-speech-extraction-2503.08798"/></url>
<url><loc>https://scifaro.com/en/abs/learning-control-of-neural-sound-effects-synthesis-from-physically-inspired-models-2503.08806</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-control-of-neural-sound-effects-synthesis-from-physically-inspired-models-2503.08806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-control-of-neural-sound-effects-synthesis-from-physically-inspired-models-2503.08806"/></url>
<url><loc>https://scifaro.com/en/abs/control-surfaces-using-the-commodore-64-and-analog-synthesizer-to-expand-musical-boundaries-2503.09053</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/control-surfaces-using-the-commodore-64-and-analog-synthesizer-to-expand-musical-boundaries-2503.09053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/control-surfaces-using-the-commodore-64-and-analog-synthesizer-to-expand-musical-boundaries-2503.09053"/></url>
<url><loc>https://scifaro.com/en/abs/zero-to-16383-through-the-wire-transmitting-high-resolution-midi-with-websockets-and-the-browser-2503.09055</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-to-16383-through-the-wire-transmitting-high-resolution-midi-with-websockets-and-the-browser-2503.09055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-to-16383-through-the-wire-transmitting-high-resolution-midi-with-websockets-and-the-browser-2503.09055"/></url>
<url><loc>https://scifaro.com/en/abs/quantization-for-openai-s-whisper-models-a-comparative-analysis-2503.09905</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantization-for-openai-s-whisper-models-a-comparative-analysis-2503.09905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantization-for-openai-s-whisper-models-a-comparative-analysis-2503.09905"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-adapter-tuning-for-joint-singing-voice-beat-and-downbeat-tracking-with-self-supervised-learning-features-2503.10086</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-adapter-tuning-for-joint-singing-voice-beat-and-downbeat-tracking-with-self-supervised-learning-features-2503.10086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-adapter-tuning-for-joint-singing-voice-beat-and-downbeat-tracking-with-self-supervised-learning-features-2503.10086"/></url>
<url><loc>https://scifaro.com/en/abs/macs-multi-source-audio-to-image-generation-with-contextual-significance-and-semantic-alignment-2503.10287</loc><lastmod>2025-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/macs-multi-source-audio-to-image-generation-with-contextual-significance-and-semantic-alignment-2503.10287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/macs-multi-source-audio-to-image-generation-with-contextual-significance-and-semantic-alignment-2503.10287"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-speaker-identification-leveraging-pre-trained-multilingual-transformers-for-robust-speaker-embeddings-2503.10446</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-speaker-identification-leveraging-pre-trained-multilingual-transformers-for-robust-speaker-embeddings-2503.10446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-speaker-identification-leveraging-pre-trained-multilingual-transformers-for-robust-speaker-embeddings-2503.10446"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-learning-for-music-to-music-video-description-generation-2503.11190</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-learning-for-music-to-music-video-description-generation-2503.11190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-learning-for-music-to-music-video-description-generation-2503.11190"/></url>
<url><loc>https://scifaro.com/en/abs/reinforcement-learning-outperforms-supervised-fine-tuning-a-case-study-on-audio-question-answering-2503.11197</loc><lastmod>2025-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reinforcement-learning-outperforms-supervised-fine-tuning-a-case-study-on-audio-question-answering-2503.11197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reinforcement-learning-outperforms-supervised-fine-tuning-a-case-study-on-audio-question-answering-2503.11197"/></url>
<url><loc>https://scifaro.com/en/abs/spike-encoding-for-environmental-sound-a-comparative-benchmark-2503.11206</loc><lastmod>2025-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spike-encoding-for-environmental-sound-a-comparative-benchmark-2503.11206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spike-encoding-for-environmental-sound-a-comparative-benchmark-2503.11206"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-potential-of-large-multimodal-models-as-effective-alternatives-for-pronunciation-assessment-2503.11229</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-potential-of-large-multimodal-models-as-effective-alternatives-for-pronunciation-assessment-2503.11229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-potential-of-large-multimodal-models-as-effective-alternatives-for-pronunciation-assessment-2503.11229"/></url>
<url><loc>https://scifaro.com/en/abs/creating-a-good-teacher-for-knowledge-distillation-in-acoustic-scene-classification-2503.11363</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-a-good-teacher-for-knowledge-distillation-in-acoustic-scene-classification-2503.11363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-a-good-teacher-for-knowledge-distillation-in-acoustic-scene-classification-2503.11363"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-performance-complexity-trade-offs-in-sound-event-detection-models-2503.11373</loc><lastmod>2025-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-performance-complexity-trade-offs-in-sound-event-detection-models-2503.11373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-performance-complexity-trade-offs-in-sound-event-detection-models-2503.11373"/></url>
<url><loc>https://scifaro.com/en/abs/designing-neural-synthesizers-for-low-latency-interaction-2503.11562</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/designing-neural-synthesizers-for-low-latency-interaction-2503.11562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/designing-neural-synthesizers-for-low-latency-interaction-2503.11562"/></url>
<url><loc>https://scifaro.com/en/abs/are-deep-speech-denoising-models-robust-to-adversarial-noise-2503.11627</loc><lastmod>2026-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-deep-speech-denoising-models-robust-to-adversarial-noise-2503.11627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-deep-speech-denoising-models-robust-to-adversarial-noise-2503.11627"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-music-data-processing-and-generation-2503.11896</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-music-data-processing-and-generation-2503.11896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-music-data-processing-and-generation-2503.11896"/></url>
<url><loc>https://scifaro.com/en/abs/computational-extraction-of-intonation-and-tuning-systems-from-multiple-microtonal-monophonic-vocal-recordings-with-diverse-modes-2503.11956</loc><lastmod>2025-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computational-extraction-of-intonation-and-tuning-systems-from-multiple-microtonal-monophonic-vocal-recordings-with-diverse-modes-2503.11956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computational-extraction-of-intonation-and-tuning-systems-from-multiple-microtonal-monophonic-vocal-recordings-with-diverse-modes-2503.11956"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-enhanced-acoustic-pre-training-and-acoustic-disentangled-prosody-adapting-for-movie-dubbing-2503.12042</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-enhanced-acoustic-pre-training-and-acoustic-disentangled-prosody-adapting-for-movie-dubbing-2503.12042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-enhanced-acoustic-pre-training-and-acoustic-disentangled-prosody-adapting-for-movie-dubbing-2503.12042"/></url>
<url><loc>https://scifaro.com/en/abs/universal-speech-token-learning-via-low-bitrate-neural-codec-and-pretrained-representations-2503.12115</loc><lastmod>2025-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-speech-token-learning-via-low-bitrate-neural-codec-and-pretrained-representations-2503.12115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-speech-token-learning-via-low-bitrate-neural-codec-and-pretrained-representations-2503.12115"/></url>
<url><loc>https://scifaro.com/en/abs/serenade-a-singing-style-conversion-framework-based-on-audio-infilling-2503.12388</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/serenade-a-singing-style-conversion-framework-based-on-audio-infilling-2503.12388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/serenade-a-singing-style-conversion-framework-based-on-audio-infilling-2503.12388"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-close-loop-predictive-coding-framework-for-auditory-working-memory-2503.12506</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-close-loop-predictive-coding-framework-for-auditory-working-memory-2503.12506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-close-loop-predictive-coding-framework-for-auditory-working-memory-2503.12506"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-two-step-training-scheme-for-domain-invariant-speech-separation-2503.12589</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-two-step-training-scheme-for-domain-invariant-speech-separation-2503.12589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-two-step-training-scheme-for-domain-invariant-speech-separation-2503.12589"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-derivation-and-elimination-audio-visual-segmentation-with-enhanced-audio-semantics-2503.12840</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-derivation-and-elimination-audio-visual-segmentation-with-enhanced-audio-semantics-2503.12840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-derivation-and-elimination-audio-visual-segmentation-with-enhanced-audio-semantics-2503.12840"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-visual-segmentation-via-audio-guided-visual-convergent-alignment-2503.12847</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-visual-segmentation-via-audio-guided-visual-convergent-alignment-2503.12847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-visual-segmentation-via-audio-guided-visual-convergent-alignment-2503.12847"/></url>
<url><loc>https://scifaro.com/en/abs/insectset459-an-open-dataset-of-insect-sounds-for-bioacoustic-machine-learning-2503.15074</loc><lastmod>2025-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/insectset459-an-open-dataset-of-insect-sounds-for-bioacoustic-machine-learning-2503.15074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/insectset459-an-open-dataset-of-insect-sounds-for-bioacoustic-machine-learning-2503.15074"/></url>
<url><loc>https://scifaro.com/en/abs/a-bird-song-detector-for-improving-bird-identification-through-deep-learning-a-case-study-from-do-nana-2503.15576</loc><lastmod>2025-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bird-song-detector-for-improving-bird-identification-through-deep-learning-a-case-study-from-do-nana-2503.15576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bird-song-detector-for-improving-bird-identification-through-deep-learning-a-case-study-from-do-nana-2503.15576"/></url>
<url><loc>https://scifaro.com/en/abs/aligning-text-to-music-evaluation-with-human-preferences-2503.16669</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aligning-text-to-music-evaluation-with-human-preferences-2503.16669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aligning-text-to-music-evaluation-with-human-preferences-2503.16669"/></url>
<url><loc>https://scifaro.com/en/abs/wavefm-a-high-fidelity-and-efficient-vocoder-based-on-flow-matching-2503.16689</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavefm-a-high-fidelity-and-efficient-vocoder-based-on-flow-matching-2503.16689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavefm-a-high-fidelity-and-efficient-vocoder-based-on-flow-matching-2503.16689"/></url>
<url><loc>https://scifaro.com/en/abs/caarma-class-augmentation-with-adversarial-mixup-regularization-2503.16718</loc><lastmod>2026-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/caarma-class-augmentation-with-adversarial-mixup-regularization-2503.16718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/caarma-class-augmentation-with-adversarial-mixup-regularization-2503.16718"/></url>
<url><loc>https://scifaro.com/en/abs/the-model-hears-you-audio-language-model-deployments-should-consider-the-principle-of-least-privilege-2503.16833</loc><lastmod>2025-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-model-hears-you-audio-language-model-deployments-should-consider-the-principle-of-least-privilege-2503.16833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-model-hears-you-audio-language-model-deployments-should-consider-the-principle-of-least-privilege-2503.16833"/></url>
<url><loc>https://scifaro.com/en/abs/improving-acoustic-scene-classification-with-city-features-2503.16862</loc><lastmod>2025-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-acoustic-scene-classification-with-city-features-2503.16862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-acoustic-scene-classification-with-city-features-2503.16862"/></url>
<url><loc>https://scifaro.com/en/abs/stftcodec-high-fidelity-audio-compression-through-time-frequency-domain-representation-2503.16989</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stftcodec-high-fidelity-audio-compression-through-time-frequency-domain-representation-2503.16989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stftcodec-high-fidelity-audio-compression-through-time-frequency-domain-representation-2503.16989"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-audio-classification-via-modal-decision-tree-learning-2503.17018</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-audio-classification-via-modal-decision-tree-learning-2503.17018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-audio-classification-via-modal-decision-tree-learning-2503.17018"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-stream-streaming-speech-enhancement-with-generative-adversarial-networks-2503.17141</loc><lastmod>2025-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-stream-streaming-speech-enhancement-with-generative-adversarial-networks-2503.17141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-stream-streaming-speech-enhancement-with-generative-adversarial-networks-2503.17141"/></url>
<url><loc>https://scifaro.com/en/abs/learning-separated-representations-for-instrument-based-music-similarity-2503.17281</loc><lastmod>2025-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-separated-representations-for-instrument-based-music-similarity-2503.17281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-separated-representations-for-instrument-based-music-similarity-2503.17281"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-audio-representations-for-vibration-based-crowd-monitoring-in-stadiums-2503.17646</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-audio-representations-for-vibration-based-crowd-monitoring-in-stadiums-2503.17646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-audio-representations-for-vibration-based-crowd-monitoring-in-stadiums-2503.17646"/></url>
<url><loc>https://scifaro.com/en/abs/lzmidi-compression-based-symbolic-music-generation-2503.17654</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lzmidi-compression-based-symbolic-music-generation-2503.17654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lzmidi-compression-based-symbolic-music-generation-2503.17654"/></url>
<url><loc>https://scifaro.com/en/abs/gsound-sir-a-spatial-impulse-response-ray-tracing-and-high-order-ambisonic-auralization-python-toolkit-2503.17866</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gsound-sir-a-spatial-impulse-response-ray-tracing-and-high-order-ambisonic-auralization-python-toolkit-2503.17866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gsound-sir-a-spatial-impulse-response-ray-tracing-and-high-order-ambisonic-auralization-python-toolkit-2503.17866"/></url>
<url><loc>https://scifaro.com/en/abs/elevating-robust-multi-talker-asr-by-decoupling-speaker-separation-and-speech-recognition-2503.17886</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/elevating-robust-multi-talker-asr-by-decoupling-speaker-separation-and-speech-recognition-2503.17886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/elevating-robust-multi-talker-asr-by-decoupling-speaker-separation-and-speech-recognition-2503.17886"/></url>
<url><loc>https://scifaro.com/en/abs/anomaly-detection-and-localization-for-speech-deepfakes-via-feature-pyramid-matching-2503.18032</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomaly-detection-and-localization-for-speech-deepfakes-via-feature-pyramid-matching-2503.18032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomaly-detection-and-localization-for-speech-deepfakes-via-feature-pyramid-matching-2503.18032"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-based-animal-emotion-classification-using-audio-signals-2503.18138</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-based-animal-emotion-classification-using-audio-signals-2503.18138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-based-animal-emotion-classification-using-audio-signals-2503.18138"/></url>
<url><loc>https://scifaro.com/en/abs/music-similarity-representation-learning-focusing-on-individual-instruments-with-source-separation-and-human-preference-2503.18486</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-similarity-representation-learning-focusing-on-individual-instruments-with-source-separation-and-human-preference-2503.18486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-similarity-representation-learning-focusing-on-individual-instruments-with-source-separation-and-human-preference-2503.18486"/></url>
<url><loc>https://scifaro.com/en/abs/wireless-hearables-with-programmable-speech-ai-accelerators-2503.18698</loc><lastmod>2025-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wireless-hearables-with-programmable-speech-ai-accelerators-2503.18698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wireless-hearables-with-programmable-speech-ai-accelerators-2503.18698"/></url>
<url><loc>https://scifaro.com/en/abs/a-reliable-and-efficient-detection-pipeline-for-rodent-ultrasonic-vocalizations-2503.18928</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-reliable-and-efficient-detection-pipeline-for-rodent-ultrasonic-vocalizations-2503.18928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-reliable-and-efficient-detection-pipeline-for-rodent-ultrasonic-vocalizations-2503.18928"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-eeg-and-speech-for-emotion-recognition-a-two-step-joint-learning-framework-for-handling-missing-eeg-data-during-inference-2503.18964</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-eeg-and-speech-for-emotion-recognition-a-two-step-joint-learning-framework-for-handling-missing-eeg-data-during-inference-2503.18964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-eeg-and-speech-for-emotion-recognition-a-two-step-joint-learning-framework-for-handling-missing-eeg-data-during-inference-2503.18964"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-the-transferability-of-audio-adversarial-examples-with-acoustic-representation-optimization-2503.19591</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-the-transferability-of-audio-adversarial-examples-with-acoustic-representation-optimization-2503.19591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-the-transferability-of-audio-adversarial-examples-with-acoustic-representation-optimization-2503.19591"/></url>
<url><loc>https://scifaro.com/en/abs/qincodec-neural-audio-compression-with-implicit-neural-codebooks-2503.19597</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qincodec-neural-audio-compression-with-implicit-neural-codebooks-2503.19597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qincodec-neural-audio-compression-with-implicit-neural-codebooks-2503.19597"/></url>
<url><loc>https://scifaro.com/en/abs/analyzable-chain-of-musical-thought-prompting-for-high-fidelity-music-generation-2503.19611</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzable-chain-of-musical-thought-prompting-for-high-fidelity-music-generation-2503.19611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzable-chain-of-musical-thought-prompting-for-high-fidelity-music-generation-2503.19611"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-speech-emotion-recognition-a-cnn-approach-utilizing-mel-spectrograms-2503.19677</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-speech-emotion-recognition-a-cnn-approach-utilizing-mel-spectrograms-2503.19677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-speech-emotion-recognition-a-cnn-approach-utilizing-mel-spectrograms-2503.19677"/></url>
<url><loc>https://scifaro.com/en/abs/fireredtts-1s-an-upgraded-streamable-foundation-text-to-speech-system-2503.20499</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fireredtts-1s-an-upgraded-streamable-foundation-text-to-speech-system-2503.20499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fireredtts-1s-an-upgraded-streamable-foundation-text-to-speech-system-2503.20499"/></url>
<url><loc>https://scifaro.com/en/abs/text-driven-voice-conversion-via-latent-state-space-modeling-2503.20999</loc><lastmod>2025-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-driven-voice-conversion-via-latent-state-space-modeling-2503.20999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-driven-voice-conversion-via-latent-state-space-modeling-2503.20999"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-recognition-accuracy-using-custom-language-models-with-the-vosk-toolkit-2503.21025</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-recognition-accuracy-using-custom-language-models-with-the-vosk-toolkit-2503.21025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-recognition-accuracy-using-custom-language-models-with-the-vosk-toolkit-2503.21025"/></url>
<url><loc>https://scifaro.com/en/abs/magnitude-phase-dual-path-speech-enhancement-network-based-on-self-supervised-embedding-and-perceptual-contrast-stretch-boosting-2503.21571</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/magnitude-phase-dual-path-speech-enhancement-network-based-on-self-supervised-embedding-and-perceptual-contrast-stretch-boosting-2503.21571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/magnitude-phase-dual-path-speech-enhancement-network-based-on-self-supervised-embedding-and-perceptual-contrast-stretch-boosting-2503.21571"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-label-propagation-a-model-size-dependent-performance-booster-for-audioset-tagging-2503.21826</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-label-propagation-a-model-size-dependent-performance-booster-for-audioset-tagging-2503.21826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-label-propagation-a-model-size-dependent-performance-booster-for-audioset-tagging-2503.21826"/></url>
<url><loc>https://scifaro.com/en/abs/tune-it-up-music-genre-transfer-and-prediction-2503.22008</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tune-it-up-music-genre-transfer-and-prediction-2503.22008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tune-it-up-music-genre-transfer-and-prediction-2503.22008"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-dance-to-music-generation-via-negative-conditioning-latent-diffusion-model-2503.22138</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-dance-to-music-generation-via-negative-conditioning-latent-diffusion-model-2503.22138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-dance-to-music-generation-via-negative-conditioning-latent-diffusion-model-2503.22138"/></url>
<url><loc>https://scifaro.com/en/abs/enhance-generation-quality-of-flow-matching-v2a-model-via-multi-step-cot-like-guidance-and-combined-preference-optimization-2503.22200</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhance-generation-quality-of-flow-matching-v2a-model-via-multi-step-cot-like-guidance-and-combined-preference-optimization-2503.22200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhance-generation-quality-of-flow-matching-v2a-model-via-multi-step-cot-like-guidance-and-combined-preference-optimization-2503.22200"/></url>
<url><loc>https://scifaro.com/en/abs/deepsound-v1-start-to-think-step-by-step-in-the-audio-generation-from-videos-2503.22208</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepsound-v1-start-to-think-step-by-step-in-the-audio-generation-from-videos-2503.22208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepsound-v1-start-to-think-step-by-step-in-the-audio-generation-from-videos-2503.22208"/></url>
<url><loc>https://scifaro.com/en/abs/cross-technology-generalization-in-synthesized-speech-detection-evaluating-ast-models-with-modern-voice-generators-2503.22503</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-technology-generalization-in-synthesized-speech-detection-evaluating-ast-models-with-modern-voice-generators-2503.22503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-technology-generalization-in-synthesized-speech-detection-evaluating-ast-models-with-modern-voice-generators-2503.22503"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-speech-emotion-with-label-variance-and-analyzing-performance-across-speakers-and-unseen-acoustic-conditions-2503.22711</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-speech-emotion-with-label-variance-and-analyzing-performance-across-speakers-and-unseen-acoustic-conditions-2503.22711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-speech-emotion-with-label-variance-and-analyzing-performance-across-speakers-and-unseen-acoustic-conditions-2503.22711"/></url>
<url><loc>https://scifaro.com/en/abs/coverage-guaranteed-speech-emotion-recognition-via-calibrated-uncertainty-adaptive-prediction-sets-2503.22712</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coverage-guaranteed-speech-emotion-recognition-via-calibrated-uncertainty-adaptive-prediction-sets-2503.22712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coverage-guaranteed-speech-emotion-recognition-via-calibrated-uncertainty-adaptive-prediction-sets-2503.22712"/></url>
<url><loc>https://scifaro.com/en/abs/dual-audio-centric-modality-coupling-for-talking-head-generation-2503.22728</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-audio-centric-modality-coupling-for-talking-head-generation-2503.22728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-audio-centric-modality-coupling-for-talking-head-generation-2503.22728"/></url>
<url><loc>https://scifaro.com/en/abs/teaching-llms-music-theory-with-in-context-learning-and-chain-of-thought-prompting-pedagogical-strategies-for-machines-2503.22853</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teaching-llms-music-theory-with-in-context-learning-and-chain-of-thought-prompting-pedagogical-strategies-for-machines-2503.22853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teaching-llms-music-theory-with-in-context-learning-and-chain-of-thought-prompting-pedagogical-strategies-for-machines-2503.22853"/></url>
<url><loc>https://scifaro.com/en/abs/crossmusim-a-cross-modal-framework-for-music-similarity-retrieval-with-llm-powered-text-description-sourcing-and-mining-2503.23128</loc><lastmod>2025-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossmusim-a-cross-modal-framework-for-music-similarity-retrieval-with-llm-powered-text-description-sourcing-and-mining-2503.23128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossmusim-a-cross-modal-framework-for-music-similarity-retrieval-with-llm-powered-text-description-sourcing-and-mining-2503.23128"/></url>
<url><loc>https://scifaro.com/en/abs/joint-source-environment-adaptation-of-data-driven-underwater-acoustic-source-ranging-based-on-model-uncertainty-2503.23258</loc><lastmod>2025-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-source-environment-adaptation-of-data-driven-underwater-acoustic-source-ranging-based-on-model-uncertainty-2503.23258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-source-environment-adaptation-of-data-driven-underwater-acoustic-source-ranging-based-on-model-uncertainty-2503.23258"/></url>
<url><loc>https://scifaro.com/en/abs/mismatch-robust-underwater-acoustic-localization-using-a-differentiable-modular-forward-model-2503.23260</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mismatch-robust-underwater-acoustic-localization-using-a-differentiable-modular-forward-model-2503.23260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mismatch-robust-underwater-acoustic-localization-using-a-differentiable-modular-forward-model-2503.23260"/></url>
<url><loc>https://scifaro.com/en/abs/joint-source-environment-adaptation-for-deep-learning-based-underwater-acoustic-source-ranging-2503.23262</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-source-environment-adaptation-for-deep-learning-based-underwater-acoustic-source-ranging-2503.23262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-source-environment-adaptation-for-deep-learning-based-underwater-acoustic-source-ranging-2503.23262"/></url>
<url><loc>https://scifaro.com/en/abs/hearfit-personalized-fitness-monitoring-via-audio-signals-on-smart-speakers-2503.23387</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearfit-personalized-fitness-monitoring-via-audio-signals-on-smart-speakers-2503.23387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearfit-personalized-fitness-monitoring-via-audio-signals-on-smart-speakers-2503.23387"/></url>
<url><loc>https://scifaro.com/en/abs/hearsmoking-smoking-detection-in-driving-environment-via-acoustic-sensing-on-smartphones-2503.23391</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearsmoking-smoking-detection-in-driving-environment-via-acoustic-sensing-on-smartphones-2503.23391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearsmoking-smoking-detection-in-driving-environment-via-acoustic-sensing-on-smartphones-2503.23391"/></url>
<url><loc>https://scifaro.com/en/abs/d3-guard-acoustic-based-drowsy-driving-detection-using-smartphones-2503.23393</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/d3-guard-acoustic-based-drowsy-driving-detection-using-smartphones-2503.23393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/d3-guard-acoustic-based-drowsy-driving-detection-using-smartphones-2503.23393"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-auditory-cognition-via-test-time-compute-in-audio-language-models-2503.23395</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-auditory-cognition-via-test-time-compute-in-audio-language-models-2503.23395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-auditory-cognition-via-test-time-compute-in-audio-language-models-2503.23395"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-the-pronunciation-of-tajweed-rules-based-on-dnn-as-a-step-towards-interactive-recitation-learning-2503.23470</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-the-pronunciation-of-tajweed-rules-based-on-dnn-as-a-step-towards-interactive-recitation-learning-2503.23470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-the-pronunciation-of-tajweed-rules-based-on-dnn-as-a-step-towards-interactive-recitation-learning-2503.23470"/></url>
<url><loc>https://scifaro.com/en/abs/unisep-universal-target-audio-separation-with-language-models-at-scale-2503.23762</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unisep-universal-target-audio-separation-with-language-models-at-scale-2503.23762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unisep-universal-target-audio-separation-with-language-models-at-scale-2503.23762"/></url>
<url><loc>https://scifaro.com/en/abs/music-information-retrieval-on-representative-mexican-folk-vocal-melodies-through-midi-feature-extraction-2503.24243</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-information-retrieval-on-representative-mexican-folk-vocal-melodies-through-midi-feature-extraction-2503.24243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-information-retrieval-on-representative-mexican-folk-vocal-melodies-through-midi-feature-extraction-2503.24243"/></url>
<url><loc>https://scifaro.com/en/abs/are-you-really-listening-boosting-perceptual-awareness-in-music-qa-benchmarks-2504.00369</loc><lastmod>2025-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-you-really-listening-boosting-perceptual-awareness-in-music-qa-benchmarks-2504.00369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-you-really-listening-boosting-perceptual-awareness-in-music-qa-benchmarks-2504.00369"/></url>
<url><loc>https://scifaro.com/en/abs/user-authentication-on-earable-devices-via-bone-conducted-occlusion-sounds-2504.00435</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/user-authentication-on-earable-devices-via-bone-conducted-occlusion-sounds-2504.00435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/user-authentication-on-earable-devices-via-bone-conducted-occlusion-sounds-2504.00435"/></url>
<url><loc>https://scifaro.com/en/abs/c-2-av-tse-context-and-confidence-aware-audio-visual-target-speaker-extraction-2504.00750</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/c-2-av-tse-context-and-confidence-aware-audio-visual-target-speaker-extraction-2504.00750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/c-2-av-tse-context-and-confidence-aware-audio-visual-target-speaker-extraction-2504.00750"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-music-generation-from-single-modal-cross-modal-and-multi-modal-perspectives-2504.00837</loc><lastmod>2026-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-music-generation-from-single-modal-cross-modal-and-multi-modal-perspectives-2504.00837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-music-generation-from-single-modal-cross-modal-and-multi-modal-perspectives-2504.00837"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-and-multi-accent-jailbreaking-of-audio-llms-2504.01094</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-and-multi-accent-jailbreaking-of-audio-llms-2504.01094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-and-multi-accent-jailbreaking-of-audio-llms-2504.01094"/></url>
<url><loc>https://scifaro.com/en/abs/token-pruning-in-audio-transformers-optimizing-performance-and-decoding-patch-importance-2504.01690</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/token-pruning-in-audio-transformers-optimizing-performance-and-decoding-patch-importance-2504.01690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/token-pruning-in-audio-transformers-optimizing-performance-and-decoding-patch-importance-2504.01690"/></url>
<url><loc>https://scifaro.com/en/abs/causal-self-supervised-pretrained-frontend-with-predictive-code-for-speech-separation-2504.02302</loc><lastmod>2025-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-self-supervised-pretrained-frontend-with-predictive-code-for-speech-separation-2504.02302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-self-supervised-pretrained-frontend-with-predictive-code-for-speech-separation-2504.02302"/></url>
<url><loc>https://scifaro.com/en/abs/evmic-event-based-non-contact-sound-recovery-from-effective-spatial-temporal-modeling-2504.02402</loc><lastmod>2025-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evmic-event-based-non-contact-sound-recovery-from-effective-spatial-temporal-modeling-2504.02402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evmic-event-based-non-contact-sound-recovery-from-effective-spatial-temporal-modeling-2504.02402"/></url>
<url><loc>https://scifaro.com/en/abs/f5r-tts-improving-flow-matching-based-text-to-speech-with-group-relative-policy-optimization-2504.02407</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f5r-tts-improving-flow-matching-based-text-to-speech-with-group-relative-policy-optimization-2504.02407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f5r-tts-improving-flow-matching-based-text-to-speech-with-group-relative-policy-optimization-2504.02407"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-music-generation-four-approaches-and-their-comparative-evaluation-2504.02586</loc><lastmod>2025-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-music-generation-four-approaches-and-their-comparative-evaluation-2504.02586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-music-generation-four-approaches-and-their-comparative-evaluation-2504.02586"/></url>
<url><loc>https://scifaro.com/en/abs/generating-diverse-audio-visual-360-soundscapes-for-sound-event-localization-and-detection-2504.02988</loc><lastmod>2025-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-diverse-audio-visual-360-soundscapes-for-sound-event-localization-and-detection-2504.02988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-diverse-audio-visual-360-soundscapes-for-sound-event-localization-and-detection-2504.02988"/></url>
<url><loc>https://scifaro.com/en/abs/rwkvtts-yet-another-tts-based-on-rwkv-7-2504.03289</loc><lastmod>2025-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rwkvtts-yet-another-tts-based-on-rwkv-7-2504.03289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rwkvtts-yet-another-tts-based-on-rwkv-7-2504.03289"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-gpu-based-implementation-for-noise-robust-sound-source-localization-2504.03373</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-gpu-based-implementation-for-noise-robust-sound-source-localization-2504.03373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-gpu-based-implementation-for-noise-robust-sound-source-localization-2504.03373"/></url>
<url><loc>https://scifaro.com/en/abs/determined-blind-source-separation-via-modeling-adjacent-frequency-band-correlations-in-speech-signals-2504.03998</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/determined-blind-source-separation-via-modeling-adjacent-frequency-band-correlations-in-speech-signals-2504.03998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/determined-blind-source-separation-via-modeling-adjacent-frequency-band-correlations-in-speech-signals-2504.03998"/></url>
<url><loc>https://scifaro.com/en/abs/formula-supervised-sound-event-detection-pre-training-without-real-data-2504.04428</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/formula-supervised-sound-event-detection-pre-training-without-real-data-2504.04428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/formula-supervised-sound-event-detection-pre-training-without-real-data-2504.04428"/></url>
<url><loc>https://scifaro.com/en/abs/loopgen-training-free-loopable-music-generation-2504.04466</loc><lastmod>2025-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loopgen-training-free-loopable-music-generation-2504.04466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loopgen-training-free-loopable-music-generation-2504.04466"/></url>
<url><loc>https://scifaro.com/en/abs/activation-patching-for-interpretable-steering-in-music-generation-2504.04479</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/activation-patching-for-interpretable-steering-in-music-generation-2504.04479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/activation-patching-for-interpretable-steering-in-music-generation-2504.04479"/></url>
<url><loc>https://scifaro.com/en/abs/solid-state-bus-comp-a-large-scale-and-diverse-dataset-for-dynamic-range-compressor-virtual-analog-modeling-2504.04589</loc><lastmod>2025-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/solid-state-bus-comp-a-large-scale-and-diverse-dataset-for-dynamic-range-compressor-virtual-analog-modeling-2504.04589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/solid-state-bus-comp-a-large-scale-and-diverse-dataset-for-dynamic-range-compressor-virtual-analog-modeling-2504.04589"/></url>
<url><loc>https://scifaro.com/en/abs/l3ac-towards-a-lightweight-and-lossless-audio-codec-2504.04949</loc><lastmod>2025-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l3ac-towards-a-lightweight-and-lossless-audio-codec-2504.04949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l3ac-towards-a-lightweight-and-lossless-audio-codec-2504.04949"/></url>
<url><loc>https://scifaro.com/en/abs/deconstructing-jazz-piano-style-using-machine-learning-2504.05009</loc><lastmod>2025-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deconstructing-jazz-piano-style-using-machine-learning-2504.05009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deconstructing-jazz-piano-style-using-machine-learning-2504.05009"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-label-potential-for-enhanced-multimodal-emotion-recognition-2504.05158</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-label-potential-for-enhanced-multimodal-emotion-recognition-2504.05158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-label-potential-for-enhanced-multimodal-emotion-recognition-2504.05158"/></url>
<url><loc>https://scifaro.com/en/abs/p2mark-plug-and-play-parameter-level-watermarking-for-neural-speech-generation-2504.05197</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/p2mark-plug-and-play-parameter-level-watermarking-for-neural-speech-generation-2504.05197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/p2mark-plug-and-play-parameter-level-watermarking-for-neural-speech-generation-2504.05197"/></url>
<url><loc>https://scifaro.com/en/abs/of-all-stripes-investigating-structure-informed-positional-encoding-for-efficient-music-generation-2504.05364</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/of-all-stripes-investigating-structure-informed-positional-encoding-for-efficient-music-generation-2504.05364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/of-all-stripes-investigating-structure-informed-positional-encoding-for-efficient-music-generation-2504.05364"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-local-interpretable-model-agnostic-explanations-for-speech-emotion-recognition-with-distribution-shift-2504.05368</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-local-interpretable-model-agnostic-explanations-for-speech-emotion-recognition-with-distribution-shift-2504.05368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-local-interpretable-model-agnostic-explanations-for-speech-emotion-recognition-with-distribution-shift-2504.05368"/></url>
<url><loc>https://scifaro.com/en/abs/soundvista-novel-view-ambient-sound-synthesis-via-visual-acoustic-binding-2504.05576</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundvista-novel-view-ambient-sound-synthesis-via-visual-acoustic-binding-2504.05576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundvista-novel-view-ambient-sound-synthesis-via-visual-acoustic-binding-2504.05576"/></url>
<url><loc>https://scifaro.com/en/abs/taro-timestep-adaptive-representation-alignment-with-onset-aware-conditioning-for-synchronized-video-to-audio-synthesis-2504.05684</loc><lastmod>2025-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taro-timestep-adaptive-representation-alignment-with-onset-aware-conditioning-for-synchronized-video-to-audio-synthesis-2504.05684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taro-timestep-adaptive-representation-alignment-with-onset-aware-conditioning-for-synchronized-video-to-audio-synthesis-2504.05684"/></url>
<url><loc>https://scifaro.com/en/abs/knn-svc-robust-zero-shot-singing-voice-conversion-with-additive-synthesis-and-concatenation-smoothness-optimization-2504.05686</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knn-svc-robust-zero-shot-singing-voice-conversion-with-additive-synthesis-and-concatenation-smoothness-optimization-2504.05686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knn-svc-robust-zero-shot-singing-voice-conversion-with-additive-synthesis-and-concatenation-smoothness-optimization-2504.05686"/></url>
<url><loc>https://scifaro.com/en/abs/stage-stemmed-accompaniment-generation-through-prefix-based-conditioning-2504.05690</loc><lastmod>2025-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stage-stemmed-accompaniment-generation-through-prefix-based-conditioning-2504.05690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stage-stemmed-accompaniment-generation-through-prefix-based-conditioning-2504.05690"/></url>
<url><loc>https://scifaro.com/en/abs/mass-spring-models-for-passive-keyword-spotting-a-springtronics-approach-2504.05802</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mass-spring-models-for-passive-keyword-spotting-a-springtronics-approach-2504.05802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mass-spring-models-for-passive-keyword-spotting-a-springtronics-approach-2504.05802"/></url>
<url><loc>https://scifaro.com/en/abs/avenet-disentangling-features-by-approximating-average-features-for-voice-conversion-2504.05833</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avenet-disentangling-features-by-approximating-average-features-for-voice-conversion-2504.05833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avenet-disentangling-features-by-approximating-average-features-for-voice-conversion-2504.05833"/></url>
<url><loc>https://scifaro.com/en/abs/r-eduire-le-bruit-gr-ace-a-la-r-ealit-e-augment-ee-sonore-auditory-concealer-2504.05847</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/r-eduire-le-bruit-gr-ace-a-la-r-ealit-e-augment-ee-sonore-auditory-concealer-2504.05847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/r-eduire-le-bruit-gr-ace-a-la-r-ealit-e-augment-ee-sonore-auditory-concealer-2504.05847"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-pitch-f0-detection-using-spectrogram-images-and-convolutional-neural-networks-2504.06165</loc><lastmod>2025-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-pitch-f0-detection-using-spectrogram-images-and-convolutional-neural-networks-2504.06165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-pitch-f0-detection-using-spectrogram-images-and-convolutional-neural-networks-2504.06165"/></url>
<url><loc>https://scifaro.com/en/abs/a-streamable-neural-audio-codec-with-residual-scalar-vector-quantization-for-real-time-communication-2504.06561</loc><lastmod>2025-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-streamable-neural-audio-codec-with-residual-scalar-vector-quantization-for-real-time-communication-2504.06561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-streamable-neural-audio-codec-with-residual-scalar-vector-quantization-for-real-time-communication-2504.06561"/></url>
<url><loc>https://scifaro.com/en/abs/detect-all-type-deepfake-audio-wavelet-prompt-tuning-for-enhanced-auditory-perception-2504.06753</loc><lastmod>2026-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detect-all-type-deepfake-audio-wavelet-prompt-tuning-for-enhanced-auditory-perception-2504.06753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detect-all-type-deepfake-audio-wavelet-prompt-tuning-for-enhanced-auditory-perception-2504.06753"/></url>
<url><loc>https://scifaro.com/en/abs/cafa-a-controllable-automatic-foley-artist-2504.06778</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cafa-a-controllable-automatic-foley-artist-2504.06778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cafa-a-controllable-automatic-foley-artist-2504.06778"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-intelligence-in-creating-representing-or-expressing-an-immersive-soundscape-2504.07153</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-intelligence-in-creating-representing-or-expressing-an-immersive-soundscape-2504.07153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-intelligence-in-creating-representing-or-expressing-an-immersive-soundscape-2504.07153"/></url>
<url><loc>https://scifaro.com/en/abs/quantum-inspired-genetic-algorithm-for-robust-source-separation-in-smart-city-acoustics-2504.07345</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantum-inspired-genetic-algorithm-for-robust-source-separation-in-smart-city-acoustics-2504.07345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantum-inspired-genetic-algorithm-for-robust-source-separation-in-smart-city-acoustics-2504.07345"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generalizability-to-tone-and-content-variations-in-the-transcription-of-amplifier-rendered-electric-guitar-audio-2504.07406</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generalizability-to-tone-and-content-variations-in-the-transcription-of-amplifier-rendered-electric-guitar-audio-2504.07406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generalizability-to-tone-and-content-variations-in-the-transcription-of-amplifier-rendered-electric-guitar-audio-2504.07406"/></url>
<url><loc>https://scifaro.com/en/abs/slimspeech-lightweight-and-efficient-text-to-speech-with-slim-rectified-flow-2504.07776</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slimspeech-lightweight-and-efficient-text-to-speech-with-slim-rectified-flow-2504.07776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slimspeech-lightweight-and-efficient-text-to-speech-with-slim-rectified-flow-2504.07776"/></url>
<url><loc>https://scifaro.com/en/abs/empowering-global-voices-a-data-efficient-phoneme-tone-adaptive-approach-to-high-fidelity-speech-synthesis-2504.07858</loc><lastmod>2025-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empowering-global-voices-a-data-efficient-phoneme-tone-adaptive-approach-to-high-fidelity-speech-synthesis-2504.07858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empowering-global-voices-a-data-efficient-phoneme-tone-adaptive-approach-to-high-fidelity-speech-synthesis-2504.07858"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-multilingual-text-to-speech-generation-with-language-aware-style-adaptation-2504.08274</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-multilingual-text-to-speech-generation-with-language-aware-style-adaptation-2504.08274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-multilingual-text-to-speech-generation-with-language-aware-style-adaptation-2504.08274"/></url>
<url><loc>https://scifaro.com/en/abs/location-oriented-sound-event-localization-and-detection-with-spatial-mapping-and-regression-localization-2504.08365</loc><lastmod>2026-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/location-oriented-sound-event-localization-and-detection-with-spatial-mapping-and-regression-localization-2504.08365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/location-oriented-sound-event-localization-and-detection-with-spatial-mapping-and-regression-localization-2504.08365"/></url>
<url><loc>https://scifaro.com/en/abs/passive-underwater-acoustic-signal-separation-based-on-feature-decoupling-dual-path-network-2504.08371</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/passive-underwater-acoustic-signal-separation-based-on-feature-decoupling-dual-path-network-2504.08371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/passive-underwater-acoustic-signal-separation-based-on-feature-decoupling-dual-path-network-2504.08371"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-design-of-diffusion-based-neural-speech-codecs-2504.08470</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-design-of-diffusion-based-neural-speech-codecs-2504.08470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-design-of-diffusion-based-neural-speech-codecs-2504.08470"/></url>
<url><loc>https://scifaro.com/en/abs/bowelrcnn-region-based-convolutional-neural-network-system-for-bowel-sound-auscultation-2504.08659</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bowelrcnn-region-based-convolutional-neural-network-system-for-bowel-sound-auscultation-2504.08659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bowelrcnn-region-based-convolutional-neural-network-system-for-bowel-sound-auscultation-2504.08659"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-audio-processing-with-large-language-model-on-wearable-devices-2504.08907</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-audio-processing-with-large-language-model-on-wearable-devices-2504.08907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-audio-processing-with-large-language-model-on-wearable-devices-2504.08907"/></url>
<url><loc>https://scifaro.com/en/abs/generation-of-musical-timbres-using-a-text-guided-diffusion-model-2504.09219</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generation-of-musical-timbres-using-a-text-guided-diffusion-model-2504.09219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generation-of-musical-timbres-using-a-text-guided-diffusion-model-2504.09219"/></url>
<url><loc>https://scifaro.com/en/abs/amnet-an-acoustic-model-network-for-enhanced-mandarin-speech-synthesis-2504.09225</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amnet-an-acoustic-model-network-for-enhanced-mandarin-speech-synthesis-2504.09225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amnet-an-acoustic-model-network-for-enhanced-mandarin-speech-synthesis-2504.09225"/></url>
<url><loc>https://scifaro.com/en/abs/fssuavl-a-discriminative-framework-using-vision-models-for-federated-self-supervised-audio-and-image-understanding-2504.09516</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fssuavl-a-discriminative-framework-using-vision-models-for-federated-self-supervised-audio-and-image-understanding-2504.09516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fssuavl-a-discriminative-framework-using-vision-models-for-federated-self-supervised-audio-and-image-understanding-2504.09516"/></url>
<url><loc>https://scifaro.com/en/abs/safespeech-robust-and-universal-voice-protection-against-malicious-speech-synthesis-2504.09839</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/safespeech-robust-and-universal-voice-protection-against-malicious-speech-synthesis-2504.09839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/safespeech-robust-and-universal-voice-protection-against-malicious-speech-synthesis-2504.09839"/></url>
<url><loc>https://scifaro.com/en/abs/separate-to-collaborate-dual-stream-diffusion-model-for-coordinated-piano-hand-motion-synthesis-2504.09885</loc><lastmod>2025-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-to-collaborate-dual-stream-diffusion-model-for-coordinated-piano-hand-motion-synthesis-2504.09885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-to-collaborate-dual-stream-diffusion-model-for-coordinated-piano-hand-motion-synthesis-2504.09885"/></url>
<url><loc>https://scifaro.com/en/abs/autostyle-tts-retrieval-augmented-generation-based-automatic-style-matching-text-to-speech-synthesis-2504.10309</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autostyle-tts-retrieval-augmented-generation-based-automatic-style-matching-text-to-speech-synthesis-2504.10309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autostyle-tts-retrieval-augmented-generation-based-automatic-style-matching-text-to-speech-synthesis-2504.10309"/></url>
<url><loc>https://scifaro.com/en/abs/almtokenizer-a-low-bitrate-and-semantic-rich-audio-codec-tokenizer-for-audio-language-modeling-2504.10344</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/almtokenizer-a-low-bitrate-and-semantic-rich-audio-codec-tokenizer-for-audio-language-modeling-2504.10344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/almtokenizer-a-low-bitrate-and-semantic-rich-audio-codec-tokenizer-for-audio-language-modeling-2504.10344"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-watermarks-are-shallow-limitations-of-post-hoc-watermarking-techniques-for-speech-2504.10782</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-watermarks-are-shallow-limitations-of-post-hoc-watermarking-techniques-for-speech-2504.10782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-watermarks-are-shallow-limitations-of-post-hoc-watermarking-techniques-for-speech-2504.10782"/></url>
<url><loc>https://scifaro.com/en/abs/sonicsieve-bringing-directional-speech-extraction-to-smartphones-using-acoustic-microstructures-2504.10793</loc><lastmod>2026-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonicsieve-bringing-directional-speech-extraction-to-smartphones-using-acoustic-microstructures-2504.10793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonicsieve-bringing-directional-speech-extraction-to-smartphones-using-acoustic-microstructures-2504.10793"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-audio-deepfake-detection-using-frame-level-latent-information-entropy-2504.10819</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-audio-deepfake-detection-using-frame-level-latent-information-entropy-2504.10819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-audio-deepfake-detection-using-frame-level-latent-information-entropy-2504.10819"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-rock-music-classification-2504.10821</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-rock-music-classification-2504.10821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-rock-music-classification-2504.10821"/></url>
<url><loc>https://scifaro.com/en/abs/steermusic-enhanced-musical-consistency-for-zero-shot-text-guided-and-personalized-music-editing-2504.10826</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steermusic-enhanced-musical-consistency-for-zero-shot-text-guided-and-personalized-music-editing-2504.10826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steermusic-enhanced-musical-consistency-for-zero-shot-text-guided-and-personalized-music-editing-2504.10826"/></url>
<url><loc>https://scifaro.com/en/abs/dopamine-audiobook-a-training-free-mllm-agent-for-emotional-and-immersive-audiobook-generation-2504.11002</loc><lastmod>2025-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dopamine-audiobook-a-training-free-mllm-agent-for-emotional-and-immersive-audiobook-generation-2504.11002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dopamine-audiobook-a-training-free-mllm-agent-for-emotional-and-immersive-audiobook-generation-2504.11002"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-with-diverse-intonation-using-conditional-variational-auto-encoder-2504.12005</loc><lastmod>2025-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-with-diverse-intonation-using-conditional-variational-auto-encoder-2504.12005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-with-diverse-intonation-using-conditional-variational-auto-encoder-2504.12005"/></url>
<url><loc>https://scifaro.com/en/abs/edge-intelligence-for-wildlife-conservation-real-time-hornbill-call-classification-using-tinyml-2504.12272</loc><lastmod>2025-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edge-intelligence-for-wildlife-conservation-real-time-hornbill-call-classification-using-tinyml-2504.12272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edge-intelligence-for-wildlife-conservation-real-time-hornbill-call-classification-using-tinyml-2504.12272"/></url>
<url><loc>https://scifaro.com/en/abs/dysarthria-normalization-via-local-lie-group-transformations-for-robust-asr-2504.12279</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dysarthria-normalization-via-local-lie-group-transformations-for-robust-asr-2504.12279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dysarthria-normalization-via-local-lie-group-transformations-for-robust-asr-2504.12279"/></url>
<url><loc>https://scifaro.com/en/abs/an-accurate-measurement-of-parametric-array-using-a-spurious-sound-filter-topologically-equivalent-to-a-half-wavelength-resonator-2504.12398</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-accurate-measurement-of-parametric-array-using-a-spurious-sound-filter-topologically-equivalent-to-a-half-wavelength-resonator-2504.12398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-accurate-measurement-of-parametric-array-using-a-spurious-sound-filter-topologically-equivalent-to-a-half-wavelength-resonator-2504.12398"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-task-learning-balanced-attention-convolutional-neural-network-model-for-few-shot-underwater-acoustic-target-recognition-2504.13102</loc><lastmod>2026-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-task-learning-balanced-attention-convolutional-neural-network-model-for-few-shot-underwater-acoustic-target-recognition-2504.13102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-task-learning-balanced-attention-convolutional-neural-network-model-for-few-shot-underwater-acoustic-target-recognition-2504.13102"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-of-speech-data-driven-approaches-challenges-applications-and-future-scope-2504.13308</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-of-speech-data-driven-approaches-challenges-applications-and-future-scope-2504.13308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-of-speech-data-driven-approaches-challenges-applications-and-future-scope-2504.13308"/></url>
<url><loc>https://scifaro.com/en/abs/musflow-multimodal-music-generation-via-conditional-flow-matching-2504.13535</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musflow-multimodal-music-generation-via-conditional-flow-matching-2504.13535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musflow-multimodal-music-generation-via-conditional-flow-matching-2504.13535"/></url>
<url><loc>https://scifaro.com/en/abs/collective-learning-mechanism-based-optimal-transport-generative-adversarial-network-for-non-parallel-voice-conversion-2504.13791</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collective-learning-mechanism-based-optimal-transport-generative-adversarial-network-for-non-parallel-voice-conversion-2504.13791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collective-learning-mechanism-based-optimal-transport-generative-adversarial-network-for-non-parallel-voice-conversion-2504.13791"/></url>
<url><loc>https://scifaro.com/en/abs/transformation-of-audio-embeddings-into-interpretable-concept-based-representations-2504.14076</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformation-of-audio-embeddings-into-interpretable-concept-based-representations-2504.14076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformation-of-audio-embeddings-into-interpretable-concept-based-representations-2504.14076"/></url>
<url><loc>https://scifaro.com/en/abs/diffvox-a-differentiable-model-for-capturing-and-analysing-vocal-effects-distributions-2504.14735</loc><lastmod>2025-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffvox-a-differentiable-model-for-capturing-and-analysing-vocal-effects-distributions-2504.14735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffvox-a-differentiable-model-for-capturing-and-analysing-vocal-effects-distributions-2504.14735"/></url>
<url><loc>https://scifaro.com/en/abs/aria-midi-a-dataset-of-piano-midi-files-for-symbolic-music-modeling-2504.15071</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aria-midi-a-dataset-of-piano-midi-files-for-symbolic-music-modeling-2504.15071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aria-midi-a-dataset-of-piano-midi-files-for-symbolic-music-modeling-2504.15071"/></url>
<url><loc>https://scifaro.com/en/abs/dragon-distributional-rewards-optimize-diffusion-generative-models-2504.15217</loc><lastmod>2025-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dragon-distributional-rewards-optimize-diffusion-generative-models-2504.15217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dragon-distributional-rewards-optimize-diffusion-generative-models-2504.15217"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-source-speaker-leakage-in-one-to-one-voice-conversion-2504.15822</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-source-speaker-leakage-in-one-to-one-voice-conversion-2504.15822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-source-speaker-leakage-in-one-to-one-voice-conversion-2504.15822"/></url>
<url><loc>https://scifaro.com/en/abs/tinyml-for-speech-recognition-2504.16213</loc><lastmod>2025-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tinyml-for-speech-recognition-2504.16213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tinyml-for-speech-recognition-2504.16213"/></url>
<url><loc>https://scifaro.com/en/abs/smart-tuning-a-symbolic-music-generation-system-with-an-audio-domain-aesthetic-reward-2504.16839</loc><lastmod>2025-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smart-tuning-a-symbolic-music-generation-system-with-an-audio-domain-aesthetic-reward-2504.16839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smart-tuning-a-symbolic-music-generation-system-with-an-audio-domain-aesthetic-reward-2504.16839"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-logmel-audio-neural-networks-for-respiratory-sound-classification-2504.17156</loc><lastmod>2025-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-logmel-audio-neural-networks-for-respiratory-sound-classification-2504.17156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-logmel-audio-neural-networks-for-respiratory-sound-classification-2504.17156"/></url>
<url><loc>https://scifaro.com/en/abs/a-machine-learning-approach-for-denoising-and-upsampling-hrtfs-2504.17586</loc><lastmod>2026-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-machine-learning-approach-for-denoising-and-upsampling-hrtfs-2504.17586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-machine-learning-approach-for-denoising-and-upsampling-hrtfs-2504.17586"/></url>
<url><loc>https://scifaro.com/en/abs/unleashing-the-power-of-natural-audio-featuring-multiple-sound-sources-2504.17782</loc><lastmod>2025-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unleashing-the-power-of-natural-audio-featuring-multiple-sound-sources-2504.17782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unleashing-the-power-of-natural-audio-featuring-multiple-sound-sources-2504.17782"/></url>
<url><loc>https://scifaro.com/en/abs/stnet-prediction-of-underwater-sound-speed-profiles-with-an-advanced-semi-transformer-neural-network-2504.17912</loc><lastmod>2025-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stnet-prediction-of-underwater-sound-speed-profiles-with-an-advanced-semi-transformer-neural-network-2504.17912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stnet-prediction-of-underwater-sound-speed-profiles-with-an-advanced-semi-transformer-neural-network-2504.17912"/></url>
<url><loc>https://scifaro.com/en/abs/tracking-articulatory-dynamics-in-speech-with-a-fixed-weight-bilstm-cnn-architecture-2504.18099</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tracking-articulatory-dynamics-in-speech-with-a-fixed-weight-bilstm-cnn-architecture-2504.18099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tracking-articulatory-dynamics-in-speech-with-a-fixed-weight-bilstm-cnn-architecture-2504.18099"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-for-low-resource-languages-through-wav2vec-fine-tuning-2504.18582</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-for-low-resource-languages-through-wav2vec-fine-tuning-2504.18582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-for-low-resource-languages-through-wav2vec-fine-tuning-2504.18582"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-retrieval-in-the-wild-challenges-effectiveness-and-robustness-2504.18950</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-retrieval-in-the-wild-challenges-effectiveness-and-robustness-2504.18950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-retrieval-in-the-wild-challenges-effectiveness-and-robustness-2504.18950"/></url>
<url><loc>https://scifaro.com/en/abs/improving-pretrained-yamnet-for-enhanced-speech-command-detection-via-transfer-learning-2504.19030</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-pretrained-yamnet-for-enhanced-speech-command-detection-via-transfer-learning-2504.19030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-pretrained-yamnet-for-enhanced-speech-command-detection-via-transfer-learning-2504.19030"/></url>
<url><loc>https://scifaro.com/en/abs/muyan-tts-a-trainable-text-to-speech-model-optimized-for-podcast-scenarios-with-a-50k-budget-2504.19146</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muyan-tts-a-trainable-text-to-speech-model-optimized-for-podcast-scenarios-with-a-50k-budget-2504.19146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muyan-tts-a-trainable-text-to-speech-model-optimized-for-podcast-scenarios-with-a-50k-budget-2504.19146"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-network-based-voice-conversion-techniques-challenges-and-recent-advancements-2504.19197</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-network-based-voice-conversion-techniques-challenges-and-recent-advancements-2504.19197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-network-based-voice-conversion-techniques-challenges-and-recent-advancements-2504.19197"/></url>
<url><loc>https://scifaro.com/en/abs/pediatric-asthma-detection-with-googles-hear-model-an-ai-driven-respiratory-sound-classifier-2504.20124</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pediatric-asthma-detection-with-googles-hear-model-an-ai-driven-respiratory-sound-classifier-2504.20124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pediatric-asthma-detection-with-googles-hear-model-an-ai-driven-respiratory-sound-classifier-2504.20124"/></url>
<url><loc>https://scifaro.com/en/abs/apg-mos-auditory-perception-guided-mos-predictor-for-synthetic-speech-2504.20447</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apg-mos-auditory-perception-guided-mos-predictor-for-synthetic-speech-2504.20447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apg-mos-auditory-perception-guided-mos-predictor-for-synthetic-speech-2504.20447"/></url>
<url><loc>https://scifaro.com/en/abs/diffusionrir-room-impulse-response-interpolation-using-diffusion-models-2504.20625</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusionrir-room-impulse-response-interpolation-using-diffusion-models-2504.20625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusionrir-room-impulse-response-interpolation-using-diffusion-models-2504.20625"/></url>
<url><loc>https://scifaro.com/en/abs/ecosoundset-a-finely-annotated-dataset-for-the-automated-acoustic-identification-of-orthoptera-and-cicadidae-in-north-central-and-temperate-western-europe-2504.20776</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecosoundset-a-finely-annotated-dataset-for-the-automated-acoustic-identification-of-orthoptera-and-cicadidae-in-north-central-and-temperate-western-europe-2504.20776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecosoundset-a-finely-annotated-dataset-for-the-automated-acoustic-identification-of-orthoptera-and-cicadidae-in-north-central-and-temperate-western-europe-2504.20776"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-non-core-language-instruction-following-in-speech-llms-via-semi-implicit-cross-lingual-cot-reasoning-2504.20835</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-non-core-language-instruction-following-in-speech-llms-via-semi-implicit-cross-lingual-cot-reasoning-2504.20835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-non-core-language-instruction-following-in-speech-llms-via-semi-implicit-cross-lingual-cot-reasoning-2504.20835"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-audio-deepfake-detection-from-raw-waveforms-a-rawnet-based-approach-with-cross-dataset-evaluation-2504.20923</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-audio-deepfake-detection-from-raw-waveforms-a-rawnet-based-approach-with-cross-dataset-evaluation-2504.20923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-audio-deepfake-detection-from-raw-waveforms-a-rawnet-based-approach-with-cross-dataset-evaluation-2504.20923"/></url>
<url><loc>https://scifaro.com/en/abs/design-analysis-and-experimental-validation-of-a-stepped-plate-parametric-array-loudspeaker-2504.21171</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-analysis-and-experimental-validation-of-a-stepped-plate-parametric-array-loudspeaker-2504.21171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-analysis-and-experimental-validation-of-a-stepped-plate-parametric-array-loudspeaker-2504.21171"/></url>
<url><loc>https://scifaro.com/en/abs/dgfnet-end-to-end-audio-visual-source-separation-based-on-dynamic-gating-fusion-2504.21366</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dgfnet-end-to-end-audio-visual-source-separation-based-on-dynamic-gating-fusion-2504.21366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dgfnet-end-to-end-audio-visual-source-separation-based-on-dynamic-gating-fusion-2504.21366"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-cultural-and-digital-divides-a-low-latency-jacktrip-framework-for-equitable-music-education-in-the-global-south-2505.00550</loc><lastmod>2025-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-cultural-and-digital-divides-a-low-latency-jacktrip-framework-for-equitable-music-education-in-the-global-south-2505.00550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-cultural-and-digital-divides-a-low-latency-jacktrip-framework-for-equitable-music-education-in-the-global-south-2505.00550"/></url>
<url><loc>https://scifaro.com/en/abs/voice-cloning-comprehensive-survey-2505.00579</loc><lastmod>2025-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-cloning-comprehensive-survey-2505.00579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-cloning-comprehensive-survey-2505.00579"/></url>
<url><loc>https://scifaro.com/en/abs/gvpt-a-software-for-guided-visual-pitch-tracking-2505.00750</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gvpt-a-software-for-guided-visual-pitch-tracking-2505.00750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gvpt-a-software-for-guided-visual-pitch-tracking-2505.00750"/></url>
<url><loc>https://scifaro.com/en/abs/smsat-a-multimodal-acoustic-dataset-and-deep-contrastive-learning-framework-for-affective-and-physiological-modeling-of-spiritual-meditation-2505.00839</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smsat-a-multimodal-acoustic-dataset-and-deep-contrastive-learning-framework-for-affective-and-physiological-modeling-of-spiritual-meditation-2505.00839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smsat-a-multimodal-acoustic-dataset-and-deep-contrastive-learning-framework-for-affective-and-physiological-modeling-of-spiritual-meditation-2505.00839"/></url>
<url><loc>https://scifaro.com/en/abs/binamix-a-python-library-for-generating-binaural-audio-datasets-2505.01369</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binamix-a-python-library-for-generating-binaural-audio-datasets-2505.01369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binamix-a-python-library-for-generating-binaural-audio-datasets-2505.01369"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-audio-temporal-forgery-localization-via-progressive-audio-language-co-learning-network-2505.01880</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-audio-temporal-forgery-localization-via-progressive-audio-language-co-learning-network-2505.01880"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-audio-temporal-forgery-localization-via-progressive-audio-language-co-learning-network-2505.01880"/></url>
<url><loc>https://scifaro.com/en/abs/maskclip-detachable-clip-on-piezoelectric-sensing-of-mask-surface-vibrations-for-real-time-noise-robust-speech-input-2505.02180</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maskclip-detachable-clip-on-piezoelectric-sensing-of-mask-surface-vibrations-for-real-time-noise-robust-speech-input-2505.02180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maskclip-detachable-clip-on-piezoelectric-sensing-of-mask-surface-vibrations-for-real-time-noise-robust-speech-input-2505.02180"/></url>
<url><loc>https://scifaro.com/en/abs/cogenav-versatile-audio-visual-representation-learning-via-contrastive-generative-synchronization-2505.03186</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cogenav-versatile-audio-visual-representation-learning-via-contrastive-generative-synchronization-2505.03186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cogenav-versatile-audio-visual-representation-learning-via-contrastive-generative-synchronization-2505.03186"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-audio-synchronous-steganography-detection-and-distributed-guide-inference-model-based-on-sliding-spectral-features-and-intelligent-inference-drive-2505.03193</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-audio-synchronous-steganography-detection-and-distributed-guide-inference-model-based-on-sliding-spectral-features-and-intelligent-inference-drive-2505.03193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-audio-synchronous-steganography-detection-and-distributed-guide-inference-model-based-on-sliding-spectral-features-and-intelligent-inference-drive-2505.03193"/></url>
<url><loc>https://scifaro.com/en/abs/mgff-tdnn-a-multi-granularity-feature-fusion-tdnn-model-with-depth-wise-separable-module-for-speaker-verification-2505.03228</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mgff-tdnn-a-multi-granularity-feature-fusion-tdnn-model-with-depth-wise-separable-module-for-speaker-verification-2505.03228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mgff-tdnn-a-multi-granularity-feature-fusion-tdnn-model-with-depth-wise-separable-module-for-speaker-verification-2505.03228"/></url>
<url><loc>https://scifaro.com/en/abs/sonicrag-high-fidelity-sound-effects-synthesis-based-on-retrival-augmented-generation-2505.03244</loc><lastmod>2025-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonicrag-high-fidelity-sound-effects-synthesis-based-on-retrival-augmented-generation-2505.03244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonicrag-high-fidelity-sound-effects-synthesis-based-on-retrival-augmented-generation-2505.03244"/></url>
<url><loc>https://scifaro.com/en/abs/sepalm-audio-language-models-are-error-correctors-for-robust-speech-separation-2505.03273</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sepalm-audio-language-models-are-error-correctors-for-robust-speech-separation-2505.03273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sepalm-audio-language-models-are-error-correctors-for-robust-speech-separation-2505.03273"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-diffusion-model-with-learnable-wavelet-for-controllable-symbolic-music-generation-2505.03314</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-diffusion-model-with-learnable-wavelet-for-controllable-symbolic-music-generation-2505.03314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-diffusion-model-with-learnable-wavelet-for-controllable-symbolic-music-generation-2505.03314"/></url>
<url><loc>https://scifaro.com/en/abs/the-inverse-drum-machine-source-separation-through-joint-transcription-and-analysis-by-synthesis-2505.03337</loc><lastmod>2025-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-inverse-drum-machine-source-separation-through-joint-transcription-and-analysis-by-synthesis-2505.03337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-inverse-drum-machine-source-separation-through-joint-transcription-and-analysis-by-synthesis-2505.03337"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-speech-denoising-by-latent-representation-alignment-with-cosine-distance-2505.03442</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-speech-denoising-by-latent-representation-alignment-with-cosine-distance-2505.03442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-speech-denoising-by-latent-representation-alignment-with-cosine-distance-2505.03442"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-zero-shot-text-to-speech-intelligibility-across-diverse-domains-via-preference-alignment-2505.04113</loc><lastmod>2025-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-zero-shot-text-to-speech-intelligibility-across-diverse-domains-via-preference-alignment-2505.04113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-zero-shot-text-to-speech-intelligibility-across-diverse-domains-via-preference-alignment-2505.04113"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-music-transcription-using-convolutional-neural-networks-and-constant-q-transform-2505.04451</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-music-transcription-using-convolutional-neural-networks-and-constant-q-transform-2505.04451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-music-transcription-using-convolutional-neural-networks-and-constant-q-transform-2505.04451"/></url>
<url><loc>https://scifaro.com/en/abs/miipher-2-a-universal-speech-restoration-model-for-million-hour-scale-data-restoration-2505.04457</loc><lastmod>2025-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/miipher-2-a-universal-speech-restoration-model-for-million-hour-scale-data-restoration-2505.04457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/miipher-2-a-universal-speech-restoration-model-for-million-hour-scale-data-restoration-2505.04457"/></url>
<url><loc>https://scifaro.com/en/abs/score-distillation-sampling-for-audio-source-separation-synthesis-and-beyond-2505.04621</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-distillation-sampling-for-audio-source-separation-synthesis-and-beyond-2505.04621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-distillation-sampling-for-audio-source-separation-synthesis-and-beyond-2505.04621"/></url>
<url><loc>https://scifaro.com/en/abs/data-standards-in-audiology-a-mixed-methods-exploration-of-community-perspectives-and-implementation-considerations-2505.04728</loc><lastmod>2026-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-standards-in-audiology-a-mixed-methods-exploration-of-community-perspectives-and-implementation-considerations-2505.04728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-standards-in-audiology-a-mixed-methods-exploration-of-community-perspectives-and-implementation-considerations-2505.04728"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-agent-ai-framework-for-immersive-audiobook-production-through-spatial-audio-and-neural-narration-2505.04885</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-agent-ai-framework-for-immersive-audiobook-production-through-spatial-audio-and-neural-narration-2505.04885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-agent-ai-framework-for-immersive-audiobook-production-through-spatial-audio-and-neural-narration-2505.04885"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-infer-repeat-structures-in-midi-performances-2505.05055</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-infer-repeat-structures-in-midi-performances-2505.05055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-infer-repeat-structures-in-midi-performances-2505.05055"/></url>
<url><loc>https://scifaro.com/en/abs/reverbmiipher-generative-speech-restoration-meets-reverberation-characteristics-controllability-2505.05077</loc><lastmod>2025-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverbmiipher-generative-speech-restoration-meets-reverberation-characteristics-controllability-2505.05077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverbmiipher-generative-speech-restoration-meets-reverberation-characteristics-controllability-2505.05077"/></url>
<url><loc>https://scifaro.com/en/abs/pairing-real-time-piano-transcription-with-symbol-level-tracking-for-precise-and-robust-score-following-2505.05078</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pairing-real-time-piano-transcription-with-symbol-level-tracking-for-precise-and-robust-score-following-2505.05078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pairing-real-time-piano-transcription-with-symbol-level-tracking-for-precise-and-robust-score-following-2505.05078"/></url>
<url><loc>https://scifaro.com/en/abs/flam-frame-wise-language-audio-modeling-2505.05335</loc><lastmod>2025-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flam-frame-wise-language-audio-modeling-2505.05335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flam-frame-wise-language-audio-modeling-2505.05335"/></url>
<url><loc>https://scifaro.com/en/abs/toward-a-sparse-and-interpretable-audio-codec-2505.05654</loc><lastmod>2025-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-a-sparse-and-interpretable-audio-codec-2505.05654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-a-sparse-and-interpretable-audio-codec-2505.05654"/></url>
<url><loc>https://scifaro.com/en/abs/fast-differentiable-modal-simulation-of-non-linear-strings-membranes-and-plates-2505.05940</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-differentiable-modal-simulation-of-non-linear-strings-membranes-and-plates-2505.05940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-differentiable-modal-simulation-of-non-linear-strings-membranes-and-plates-2505.05940"/></url>
<url><loc>https://scifaro.com/en/abs/learning-music-audio-representations-with-limited-data-2505.06042</loc><lastmod>2025-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-music-audio-representations-with-limited-data-2505.06042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-music-audio-representations-with-limited-data-2505.06042"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-identity-a-generalizable-approach-for-deepfake-audio-detection-2505.06766</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-identity-a-generalizable-approach-for-deepfake-audio-detection-2505.06766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-identity-a-generalizable-approach-for-deepfake-audio-detection-2505.06766"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-ears-and-eyes-analyzing-audio-and-visual-large-language-models-to-humans-in-visible-sound-recognition-and-reducing-their-sensory-gap-via-cross-modal-distillation-2505.06803</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-ears-and-eyes-analyzing-audio-and-visual-large-language-models-to-humans-in-visible-sound-recognition-and-reducing-their-sensory-gap-via-cross-modal-distillation-2505.06803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-ears-and-eyes-analyzing-audio-and-visual-large-language-models-to-humans-in-visible-sound-recognition-and-reducing-their-sensory-gap-via-cross-modal-distillation-2505.06803"/></url>
<url><loc>https://scifaro.com/en/abs/multi-band-frequency-reconstruction-for-neural-psychoacoustic-coding-2505.07235</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-band-frequency-reconstruction-for-neural-psychoacoustic-coding-2505.07235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-band-frequency-reconstruction-for-neural-psychoacoustic-coding-2505.07235"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-music-track-popularity-by-convolutional-neural-networks-on-spotify-features-and-spectrogram-of-audio-waveform-2505.07280</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-music-track-popularity-by-convolutional-neural-networks-on-spotify-features-and-spectrogram-of-audio-waveform-2505.07280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-music-track-popularity-by-convolutional-neural-networks-on-spotify-features-and-spectrogram-of-audio-waveform-2505.07280"/></url>
<url><loc>https://scifaro.com/en/abs/multi-domain-audio-question-answering-benchmark-toward-acoustic-content-reasoning-2505.07365</loc><lastmod>2026-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-domain-audio-question-answering-benchmark-toward-acoustic-content-reasoning-2505.07365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-domain-audio-question-answering-benchmark-toward-acoustic-content-reasoning-2505.07365"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-end-to-end-text-to-speech-synthesis-for-low-resource-on-device-applications-2505.07701</loc><lastmod>2025-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-end-to-end-text-to-speech-synthesis-for-low-resource-on-device-applications-2505.07701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-end-to-end-text-to-speech-synthesis-for-low-resource-on-device-applications-2505.07701"/></url>
<url><loc>https://scifaro.com/en/abs/isac-an-invertible-and-stable-auditory-filter-bank-with-customizable-kernels-for-ml-integration-2505.07709</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/isac-an-invertible-and-stable-auditory-filter-bank-with-customizable-kernels-for-ml-integration-2505.07709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/isac-an-invertible-and-stable-auditory-filter-bank-with-customizable-kernels-for-ml-integration-2505.07709"/></url>
<url><loc>https://scifaro.com/en/abs/fast-text-to-audio-generation-with-adversarial-post-training-2505.08175</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-text-to-audio-generation-with-adversarial-post-training-2505.08175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-text-to-audio-generation-with-adversarial-post-training-2505.08175"/></url>
<url><loc>https://scifaro.com/en/abs/not-that-groove-zero-shot-symbolic-music-editing-2505.08203</loc><lastmod>2026-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/not-that-groove-zero-shot-symbolic-music-editing-2505.08203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/not-that-groove-zero-shot-symbolic-music-editing-2505.08203"/></url>
<url><loc>https://scifaro.com/en/abs/a-mamba-based-network-for-semi-supervised-singing-melody-extraction-using-confidence-binary-regularization-2505.08681</loc><lastmod>2025-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mamba-based-network-for-semi-supervised-singing-melody-extraction-using-confidence-binary-regularization-2505.08681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mamba-based-network-for-semi-supervised-singing-melody-extraction-using-confidence-binary-regularization-2505.08681"/></url>
<url><loc>https://scifaro.com/en/abs/dpn-gan-inducing-periodic-activations-in-generative-adversarial-networks-for-high-fidelity-audio-synthesis-2505.09091</loc><lastmod>2025-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpn-gan-inducing-periodic-activations-in-generative-adversarial-networks-for-high-fidelity-audio-synthesis-2505.09091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpn-gan-inducing-periodic-activations-in-generative-adversarial-networks-for-high-fidelity-audio-synthesis-2505.09091"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-noise-resilient-keyword-spotting-using-one-shot-learning-2505.09304</loc><lastmod>2025-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-noise-resilient-keyword-spotting-using-one-shot-learning-2505.09304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-noise-resilient-keyword-spotting-using-one-shot-learning-2505.09304"/></url>
<url><loc>https://scifaro.com/en/abs/singnet-towards-a-large-scale-diverse-and-in-the-wild-singing-voice-dataset-2505.09325</loc><lastmod>2025-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singnet-towards-a-large-scale-diverse-and-in-the-wild-singing-voice-dataset-2505.09325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singnet-towards-a-large-scale-diverse-and-in-the-wild-singing-voice-dataset-2505.09325"/></url>
<url><loc>https://scifaro.com/en/abs/the-voice-timbre-attribute-detection-2025-challenge-evaluation-plan-2505.09382</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voice-timbre-attribute-detection-2025-challenge-evaluation-plan-2505.09382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voice-timbre-attribute-detection-2025-challenge-evaluation-plan-2505.09382"/></url>
<url><loc>https://scifaro.com/en/abs/specwav-attack-leveraging-spectrogram-resizing-and-wav2vec-2-0-for-attacking-anonymized-speech-2505.09616</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specwav-attack-leveraging-spectrogram-resizing-and-wav2vec-2-0-for-attacking-anonymized-speech-2505.09616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specwav-attack-leveraging-spectrogram-resizing-and-wav2vec-2-0-for-attacking-anonymized-speech-2505.09616"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-musical-deepfakes-2505.09633</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-musical-deepfakes-2505.09633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-musical-deepfakes-2505.09633"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-voice-timbre-attribute-detection-2505.09661</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-voice-timbre-attribute-detection-2505.09661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-voice-timbre-attribute-detection-2505.09661"/></url>
<url><loc>https://scifaro.com/en/abs/theoretical-model-of-acoustic-power-transfer-through-solids-2505.09784</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theoretical-model-of-acoustic-power-transfer-through-solids-2505.09784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theoretical-model-of-acoustic-power-transfer-through-solids-2505.09784"/></url>
<url><loc>https://scifaro.com/en/abs/lav-audio-driven-dynamic-visual-generation-with-neural-compression-and-stylegan2-2505.10101</loc><lastmod>2026-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lav-audio-driven-dynamic-visual-generation-with-neural-compression-and-stylegan2-2505.10101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lav-audio-driven-dynamic-visual-generation-with-neural-compression-and-stylegan2-2505.10101"/></url>
<url><loc>https://scifaro.com/en/abs/learning-nonlinear-dynamics-in-physical-modelling-synthesis-using-neural-ordinary-differential-equations-2505.10511</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-nonlinear-dynamics-in-physical-modelling-synthesis-using-neural-ordinary-differential-equations-2505.10511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-nonlinear-dynamics-in-physical-modelling-synthesis-using-neural-ordinary-differential-equations-2505.10511"/></url>
<url><loc>https://scifaro.com/en/abs/t2a-feedback-improving-basic-capabilities-of-text-to-audio-generation-via-fine-grained-ai-feedback-2505.10561</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t2a-feedback-improving-basic-capabilities-of-text-to-audio-generation-via-fine-grained-ai-feedback-2505.10561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t2a-feedback-improving-basic-capabilities-of-text-to-audio-generation-via-fine-grained-ai-feedback-2505.10561"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stage-speaker-diarization-for-noisy-classrooms-2505.10879</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stage-speaker-diarization-for-noisy-classrooms-2505.10879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stage-speaker-diarization-for-noisy-classrooms-2505.10879"/></url>
<url><loc>https://scifaro.com/en/abs/banglafake-constructing-and-evaluating-a-specialized-bengali-deepfake-audio-dataset-2505.10885</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/banglafake-constructing-and-evaluating-a-specialized-bengali-deepfake-audio-dataset-2505.10885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/banglafake-constructing-and-evaluating-a-specialized-bengali-deepfake-audio-dataset-2505.10885"/></url>
<url><loc>https://scifaro.com/en/abs/allm4add-unlocking-the-capabilities-of-audio-large-language-models-for-audio-deepfake-detection-2505.11079</loc><lastmod>2025-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/allm4add-unlocking-the-capabilities-of-audio-large-language-models-for-audio-deepfake-detection-2505.11079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/allm4add-unlocking-the-capabilities-of-audio-large-language-models-for-audio-deepfake-detection-2505.11079"/></url>
<url><loc>https://scifaro.com/en/abs/audio-turing-test-benchmarking-the-human-likeness-of-large-language-model-based-text-to-speech-systems-in-chinese-2505.11200</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-turing-test-benchmarking-the-human-likeness-of-large-language-model-based-text-to-speech-systems-in-chinese-2505.11200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-turing-test-benchmarking-the-human-likeness-of-large-language-model-based-text-to-speech-systems-in-chinese-2505.11200"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-sound-hearing-sight-uncovering-modality-bias-and-conflict-of-ai-models-in-sound-localization-2505.11217</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-sound-hearing-sight-uncovering-modality-bias-and-conflict-of-ai-models-in-sound-localization-2505.11217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-sound-hearing-sight-uncovering-modality-bias-and-conflict-of-ai-models-in-sound-localization-2505.11217"/></url>
<url><loc>https://scifaro.com/en/abs/improving-inference-time-optimisation-for-vocal-effects-style-transfer-with-a-gaussian-prior-2505.11315</loc><lastmod>2025-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-inference-time-optimisation-for-vocal-effects-style-transfer-with-a-gaussian-prior-2505.11315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-inference-time-optimisation-for-vocal-effects-style-transfer-with-a-gaussian-prior-2505.11315"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-approaches-to-vocal-register-classification-in-contemporary-male-pop-music-2505.11378</loc><lastmod>2025-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-approaches-to-vocal-register-classification-in-contemporary-male-pop-music-2505.11378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-approaches-to-vocal-register-classification-in-contemporary-male-pop-music-2505.11378"/></url>
<url><loc>https://scifaro.com/en/abs/asr-fairbench-measuring-and-benchmarking-equity-across-speech-recognition-systems-2505.11572</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-fairbench-measuring-and-benchmarking-equity-across-speech-recognition-systems-2505.11572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-fairbench-measuring-and-benchmarking-equity-across-speech-recognition-systems-2505.11572"/></url>
</urlset>