<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url><loc>https://scifaro.com/en/abs/a-new-cosine-series-antialiasing-function-and-its-application-to-aliasing-free-glottal-source-models-for-speech-and-singing-synthesis-1702.06724</loc><lastmod>2018-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-cosine-series-antialiasing-function-and-its-application-to-aliasing-free-glottal-source-models-for-speech-and-singing-synthesis-1702.06724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-cosine-series-antialiasing-function-and-its-application-to-aliasing-free-glottal-source-models-for-speech-and-singing-synthesis-1702.06724"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-performance-evaluation-of-squash-players-1704.08765</loc><lastmod>2018-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-performance-evaluation-of-squash-players-1704.08765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-performance-evaluation-of-squash-players-1704.08765"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-uniform-and-random-sampling-for-speech-and-music-signals-1705.01457</loc><lastmod>2017-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-uniform-and-random-sampling-for-speech-and-music-signals-1705.01457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-uniform-and-random-sampling-for-speech-and-music-signals-1705.01457"/></url>
<url><loc>https://scifaro.com/en/abs/a-modulation-property-of-time-frequency-derivatives-of-filtered-phase-and-its-application-to-aperiodicity-and-fo-estimation-1706.02964</loc><lastmod>2018-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-modulation-property-of-time-frequency-derivatives-of-filtered-phase-and-its-application-to-aperiodicity-and-fo-estimation-1706.02964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-modulation-property-of-time-frequency-derivatives-of-filtered-phase-and-its-application-to-aperiodicity-and-fo-estimation-1706.02964"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-organisation-and-quality-analysis-of-user-generated-content-with-audio-fingerprinting-1708.05291</loc><lastmod>2017-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-organisation-and-quality-analysis-of-user-generated-content-with-audio-fingerprinting-1708.05291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-organisation-and-quality-analysis-of-user-generated-content-with-audio-fingerprinting-1708.05291"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-organisation-segmentation-and-filtering-of-user-generated-audio-content-1708.05302</loc><lastmod>2017-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-organisation-segmentation-and-filtering-of-user-generated-audio-content-1708.05302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-organisation-segmentation-and-filtering-of-user-generated-audio-content-1708.05302"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-generative-adversarial-networks-for-speech-enhancement-and-noise-robust-speaker-verification-1709.01703</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-generative-adversarial-networks-for-speech-enhancement-and-noise-robust-speaker-verification-1709.01703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-generative-adversarial-networks-for-speech-enhancement-and-noise-robust-speaker-verification-1709.01703"/></url>
<url><loc>https://scifaro.com/en/abs/musegan-multi-track-sequential-generative-adversarial-networks-for-symbolic-music-generation-and-accompaniment-1709.06298</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musegan-multi-track-sequential-generative-adversarial-networks-for-symbolic-music-generation-and-accompaniment-1709.06298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musegan-multi-track-sequential-generative-adversarial-networks-for-symbolic-music-generation-and-accompaniment-1709.06298"/></url>
<url><loc>https://scifaro.com/en/abs/broadband-multizone-sound-rendering-by-jointly-optimizing-the-sound-pressure-and-particle-velocity-1709.07269</loc><lastmod>2018-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/broadband-multizone-sound-rendering-by-jointly-optimizing-the-sound-pressure-and-particle-velocity-1709.07269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/broadband-multizone-sound-rendering-by-jointly-optimizing-the-sound-pressure-and-particle-velocity-1709.07269"/></url>
<url><loc>https://scifaro.com/en/abs/utd-crss-submission-for-mgb-3-arabic-dialect-identification-front-end-and-back-end-advancements-on-broadcast-speech-1710.00113</loc><lastmod>2017-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utd-crss-submission-for-mgb-3-arabic-dialect-identification-front-end-and-back-end-advancements-on-broadcast-speech-1710.00113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utd-crss-submission-for-mgb-3-arabic-dialect-identification-front-end-and-back-end-advancements-on-broadcast-speech-1710.00113"/></url>
<url><loc>https://scifaro.com/en/abs/plda-based-diarization-of-telephone-conversations-1710.00116</loc><lastmod>2017-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plda-based-diarization-of-telephone-conversations-1710.00116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plda-based-diarization-of-telephone-conversations-1710.00116"/></url>
<url><loc>https://scifaro.com/en/abs/head-shadow-enhancement-with-low-frequency-beamforming-improves-sound-localization-and-speech-perception-for-simulated-bimodal-listeners-1710.01904</loc><lastmod>2018-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/head-shadow-enhancement-with-low-frequency-beamforming-improves-sound-localization-and-speech-perception-for-simulated-bimodal-listeners-1710.01904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/head-shadow-enhancement-with-low-frequency-beamforming-improves-sound-localization-and-speech-perception-for-simulated-bimodal-listeners-1710.01904"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-dnn-based-speaker-recognition-inspired-by-i-vector-and-plda-1710.02369</loc><lastmod>2018-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-dnn-based-speaker-recognition-inspired-by-i-vector-and-plda-1710.02369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-dnn-based-speaker-recognition-inspired-by-i-vector-and-plda-1710.02369"/></url>
<url><loc>https://scifaro.com/en/abs/the-dirha-english-corpus-and-related-tasks-for-distant-speech-recognition-in-domestic-environments-1710.02560</loc><lastmod>2017-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dirha-english-corpus-and-related-tasks-for-distant-speech-recognition-in-domestic-environments-1710.02560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dirha-english-corpus-and-related-tasks-for-distant-speech-recognition-in-domestic-environments-1710.02560"/></url>
<url><loc>https://scifaro.com/en/abs/contaminated-speech-training-methods-for-robust-dnn-hmm-distant-speech-recognition-1710.03538</loc><lastmod>2017-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contaminated-speech-training-methods-for-robust-dnn-hmm-distant-speech-recognition-1710.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contaminated-speech-training-methods-for-robust-dnn-hmm-distant-speech-recognition-1710.03538"/></url>
<url><loc>https://scifaro.com/en/abs/prose-perceptual-risk-optimization-for-speech-enhancement-1710.03975</loc><lastmod>2017-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prose-perceptual-risk-optimization-for-speech-enhancement-1710.03975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prose-perceptual-risk-optimization-for-speech-enhancement-1710.03975"/></url>
<url><loc>https://scifaro.com/en/abs/audio-concept-classification-with-hierarchical-deep-neural-networks-1710.04288</loc><lastmod>2017-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-concept-classification-with-hierarchical-deep-neural-networks-1710.04288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-concept-classification-with-hierarchical-deep-neural-networks-1710.04288"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-conditioning-of-the-spherical-harmonic-matrix-for-spatial-audio-applications-1710.08633</loc><lastmod>2018-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-conditioning-of-the-spherical-harmonic-matrix-for-spatial-audio-applications-1710.08633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-conditioning-of-the-spherical-harmonic-matrix-for-spatial-audio-applications-1710.08633"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-landmarks-contain-more-information-about-the-phone-string-than-other-frames-for-automatic-speech-recognition-with-deep-neural-network-acoustic-model-1710.09985</loc><lastmod>2018-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-landmarks-contain-more-information-about-the-phone-string-than-other-frames-for-automatic-speech-recognition-with-deep-neural-network-acoustic-model-1710.09985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-landmarks-contain-more-information-about-the-phone-string-than-other-frames-for-automatic-speech-recognition-with-deep-neural-network-acoustic-model-1710.09985"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-tracking-and-separating-speech-sources-using-multiple-features-and-the-generalized-labeled-multi-bernoulli-framework-1710.10432</loc><lastmod>2018-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-tracking-and-separating-speech-sources-using-multiple-features-and-the-generalized-labeled-multi-bernoulli-framework-1710.10432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-tracking-and-separating-speech-sources-using-multiple-features-and-the-generalized-labeled-multi-bernoulli-framework-1710.10432"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-end-to-end-loss-for-speaker-verification-1710.10467</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-end-to-end-loss-for-speaker-verification-1710.10467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-end-to-end-loss-for-speaker-verification-1710.10467"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-with-lstm-1710.10468</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-with-lstm-1710.10468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-with-lstm-1710.10468"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-models-for-text-dependent-speaker-verification-1710.10470</loc><lastmod>2018-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-models-for-text-dependent-speaker-verification-1710.10470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-models-for-text-dependent-speaker-verification-1710.10470"/></url>
<url><loc>https://scifaro.com/en/abs/nebula-f0-estimation-and-voicing-detection-by-modeling-the-statistical-properties-of-feature-extractors-1710.11317</loc><lastmod>2018-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nebula-f0-estimation-and-voicing-detection-by-modeling-the-statistical-properties-of-feature-extractors-1710.11317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nebula-f0-estimation-and-voicing-detection-by-modeling-the-statistical-properties-of-feature-extractors-1710.11317"/></url>
<url><loc>https://scifaro.com/en/abs/robust-expectation-maximization-algorithm-for-doa-estimation-of-acoustic-sources-in-the-spherical-harmonic-domain-1711.01583</loc><lastmod>2017-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-expectation-maximization-algorithm-for-doa-estimation-of-acoustic-sources-in-the-spherical-harmonic-domain-1711.01583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-expectation-maximization-algorithm-for-doa-estimation-of-acoustic-sources-in-the-spherical-harmonic-domain-1711.01583"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-speech-recognition-with-a-single-end-to-end-model-1711.01694</loc><lastmod>2018-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-speech-recognition-with-a-single-end-to-end-model-1711.01694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-speech-recognition-with-a-single-end-to-end-model-1711.01694"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-phase-hrtf-modeling-of-pinna-spectral-notches-using-group-delay-decomposition-1711.01872</loc><lastmod>2018-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-phase-hrtf-modeling-of-pinna-spectral-notches-using-group-delay-decomposition-1711.01872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-phase-hrtf-modeling-of-pinna-spectral-notches-using-group-delay-decomposition-1711.01872"/></url>
<url><loc>https://scifaro.com/en/abs/deep-networks-tag-the-location-of-bird-vocalisations-on-audio-spectrograms-1711.04347</loc><lastmod>2017-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-networks-tag-the-location-of-bird-vocalisations-on-audio-spectrograms-1711.04347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-networks-tag-the-location-of-bird-vocalisations-on-audio-spectrograms-1711.04347"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-alarm-sounds-in-a-noisy-hospital-environment-using-model-and-non-model-based-approaches-1711.04351</loc><lastmod>2017-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-alarm-sounds-in-a-noisy-hospital-environment-using-model-and-non-model-based-approaches-1711.04351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-alarm-sounds-in-a-noisy-hospital-environment-using-model-and-non-model-based-approaches-1711.04351"/></url>
<url><loc>https://scifaro.com/en/abs/phonemic-and-graphemic-multilingual-ctc-based-speech-recognition-1711.04564</loc><lastmod>2017-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonemic-and-graphemic-multilingual-ctc-based-speech-recognition-1711.04564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonemic-and-graphemic-multilingual-ctc-based-speech-recognition-1711.04564"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-adaptation-of-rnn-based-asr-systems-1711.04569</loc><lastmod>2018-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-adaptation-of-rnn-based-asr-systems-1711.04569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-adaptation-of-rnn-based-asr-systems-1711.04569"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-conflict-detection-in-police-body-worn-audio-1711.05355</loc><lastmod>2018-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-conflict-detection-in-police-body-worn-audio-1711.05355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-conflict-detection-in-police-body-worn-audio-1711.05355"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-in-synthetic-audio-analysis-of-the-dcase-2016-task-results-1711.05551</loc><lastmod>2017-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-in-synthetic-audio-analysis-of-the-dcase-2016-task-results-1711.05551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-in-synthetic-audio-analysis-of-the-dcase-2016-task-results-1711.05551"/></url>
<url><loc>https://scifaro.com/en/abs/deep-long-short-term-memory-adaptive-beamforming-networks-for-multichannel-robust-speech-recognition-1711.08016</loc><lastmod>2018-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-long-short-term-memory-adaptive-beamforming-networks-for-multichannel-robust-speech-recognition-1711.08016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-long-short-term-memory-adaptive-beamforming-networks-for-multichannel-robust-speech-recognition-1711.08016"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-correction-using-canonical-time-warping-1711.08600</loc><lastmod>2017-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-correction-using-canonical-time-warping-1711.08600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-correction-using-canonical-time-warping-1711.08600"/></url>
<url><loc>https://scifaro.com/en/abs/realistic-multi-microphone-data-simulation-for-distant-speech-recognition-1711.09470</loc><lastmod>2017-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realistic-multi-microphone-data-simulation-for-distant-speech-recognition-1711.09470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realistic-multi-microphone-data-simulation-for-distant-speech-recognition-1711.09470"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-training-and-cross-lingual-adaptation-on-ctc-based-acoustic-model-1711.10025</loc><lastmod>2018-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-training-and-cross-lingual-adaptation-on-ctc-based-acoustic-model-1711.10025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-training-and-cross-lingual-adaptation-on-ctc-based-acoustic-model-1711.10025"/></url>
<url><loc>https://scifaro.com/en/abs/raga-identification-using-repetitive-note-patterns-from-prescriptive-notations-of-carnatic-music-1711.11357</loc><lastmod>2017-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raga-identification-using-repetitive-note-patterns-from-prescriptive-notations-of-carnatic-music-1711.11357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raga-identification-using-repetitive-note-patterns-from-prescriptive-notations-of-carnatic-music-1711.11357"/></url>
<url><loc>https://scifaro.com/en/abs/wavenet-based-low-rate-speech-coding-1712.01120</loc><lastmod>2017-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavenet-based-low-rate-speech-coding-1712.01120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavenet-based-low-rate-speech-coding-1712.01120"/></url>
<url><loc>https://scifaro.com/en/abs/precision-scaling-of-neural-networks-for-efficient-audio-processing-1712.01340</loc><lastmod>2017-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/precision-scaling-of-neural-networks-for-efficient-audio-processing-1712.01340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/precision-scaling-of-neural-networks-for-efficient-audio-processing-1712.01340"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dialect-speech-recognition-with-a-single-sequence-to-sequence-model-1712.01541</loc><lastmod>2017-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dialect-speech-recognition-with-a-single-sequence-to-sequence-model-1712.01541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dialect-speech-recognition-with-a-single-sequence-to-sequence-model-1712.01541"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-recognition-in-cocktail-party-problem-1712.01742</loc><lastmod>2017-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-recognition-in-cocktail-party-problem-1712.01742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-recognition-in-cocktail-party-problem-1712.01742"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-incorporating-an-external-language-model-into-a-sequence-to-sequence-model-1712.01996</loc><lastmod>2017-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-incorporating-an-external-language-model-into-a-sequence-to-sequence-model-1712.01996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-incorporating-an-external-language-model-into-a-sequence-to-sequence-model-1712.01996"/></url>
<url><loc>https://scifaro.com/en/abs/on-musical-onset-detection-via-the-s-transform-1712.02567</loc><lastmod>2018-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-musical-onset-detection-via-the-s-transform-1712.02567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-musical-onset-detection-via-the-s-transform-1712.02567"/></url>
<url><loc>https://scifaro.com/en/abs/classification-vs-regression-in-supervised-learning-for-single-channel-speaker-count-estimation-1712.04555</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-vs-regression-in-supervised-learning-for-single-channel-speaker-count-estimation-1712.04555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-vs-regression-in-supervised-learning-for-single-channel-speaker-count-estimation-1712.04555"/></url>
<url><loc>https://scifaro.com/en/abs/learning-spontaneity-to-improve-emotion-recognition-in-speech-1712.04753</loc><lastmod>2018-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-spontaneity-to-improve-emotion-recognition-in-speech-1712.04753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-spontaneity-to-improve-emotion-recognition-in-speech-1712.04753"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-a-spectral-glottal-model-for-the-source-filter-separation-of-speech-1712.08034</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-a-spectral-glottal-model-for-the-source-filter-separation-of-speech-1712.08034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-a-spectral-glottal-model-for-the-source-filter-separation-of-speech-1712.08034"/></url>
<url><loc>https://scifaro.com/en/abs/overcomplete-frame-thresholding-for-acoustic-scene-analysis-1712.09117</loc><lastmod>2017-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overcomplete-frame-thresholding-for-acoustic-scene-analysis-1712.09117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overcomplete-frame-thresholding-for-acoustic-scene-analysis-1712.09117"/></url>
<url><loc>https://scifaro.com/en/abs/audio-to-body-dynamics-1712.09382</loc><lastmod>2020-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-to-body-dynamics-1712.09382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-to-body-dynamics-1712.09382"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-analysis-for-nonstationary-audio-1712.10252</loc><lastmod>2018-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-analysis-for-nonstationary-audio-1712.10252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-analysis-for-nonstationary-audio-1712.10252"/></url>
<url><loc>https://scifaro.com/en/abs/logarithmic-frequency-scaling-and-consistent-frequency-coverage-for-the-selection-of-auditory-filterbank-center-frequencies-1801.00075</loc><lastmod>2018-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/logarithmic-frequency-scaling-and-consistent-frequency-coverage-for-the-selection-of-auditory-filterbank-center-frequencies-1801.00075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/logarithmic-frequency-scaling-and-consistent-frequency-coverage-for-the-selection-of-auditory-filterbank-center-frequencies-1801.00075"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-with-one-microphone-a-few-legos-and-non-negative-matrix-factorization-1801.03740</loc><lastmod>2018-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-with-one-microphone-a-few-legos-and-non-negative-matrix-factorization-1801.03740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-with-one-microphone-a-few-legos-and-non-negative-matrix-factorization-1801.03740"/></url>
<url><loc>https://scifaro.com/en/abs/informed-group-sparse-representation-for-singing-voice-separation-1801.03815</loc><lastmod>2018-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/informed-group-sparse-representation-for-singing-voice-separation-1801.03815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/informed-group-sparse-representation-for-singing-voice-separation-1801.03815"/></url>
<url><loc>https://scifaro.com/en/abs/epoch-synchronous-overlap-add-esola-for-time-and-pitch-scale-modification-of-speech-signals-1801.06492</loc><lastmod>2018-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/epoch-synchronous-overlap-add-esola-for-time-and-pitch-scale-modification-of-speech-signals-1801.06492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/epoch-synchronous-overlap-add-esola-for-time-and-pitch-scale-modification-of-speech-signals-1801.06492"/></url>
<url><loc>https://scifaro.com/en/abs/highly-reverberant-real-environment-database-hrre-1801.09651</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/highly-reverberant-real-environment-database-hrre-1801.09651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/highly-reverberant-real-environment-database-hrre-1801.09651"/></url>
<url><loc>https://scifaro.com/en/abs/a-divide-and-conquer-strategy-for-musical-noise-free-speech-enhancement-in-adverse-environments-1802.02665</loc><lastmod>2018-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-divide-and-conquer-strategy-for-musical-noise-free-speech-enhancement-in-adverse-environments-1802.02665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-divide-and-conquer-strategy-for-musical-noise-free-speech-enhancement-in-adverse-environments-1802.02665"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-teager-energy-operated-perceptual-wavelet-packet-coefficients-with-an-erlang-2-pdf-for-real-time-enhancement-of-noisy-speech-1802.03472</loc><lastmod>2018-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-teager-energy-operated-perceptual-wavelet-packet-coefficients-with-an-erlang-2-pdf-for-real-time-enhancement-of-noisy-speech-1802.03472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-teager-energy-operated-perceptual-wavelet-packet-coefficients-with-an-erlang-2-pdf-for-real-time-enhancement-of-noisy-speech-1802.03472"/></url>
<url><loc>https://scifaro.com/en/abs/phased-microphone-array-for-sound-source-localization-with-deep-learning-1802.04479</loc><lastmod>2018-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phased-microphone-array-for-sound-source-localization-with-deep-learning-1802.04479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phased-microphone-array-for-sound-source-localization-with-deep-learning-1802.04479"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-noisy-speech-with-low-speech-distortion-based-on-probabilistic-geometric-spectral-subtraction-1802.05125</loc><lastmod>2018-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-with-low-speech-distortion-based-on-probabilistic-geometric-spectral-subtraction-1802.05125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-with-low-speech-distortion-based-on-probabilistic-geometric-spectral-subtraction-1802.05125"/></url>
<url><loc>https://scifaro.com/en/abs/close-miking-empirical-practice-verification-a-source-separation-approach-1802.05132</loc><lastmod>2018-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/close-miking-empirical-practice-verification-a-source-separation-approach-1802.05132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/close-miking-empirical-practice-verification-a-source-separation-approach-1802.05132"/></url>
<url><loc>https://scifaro.com/en/abs/joint-estimation-of-room-geometry-and-modes-with-compressed-sensing-1802.05879</loc><lastmod>2018-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-estimation-of-room-geometry-and-modes-with-compressed-sensing-1802.05879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-estimation-of-room-geometry-and-modes-with-compressed-sensing-1802.05879"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-an-exponential-model-based-threshold-and-a-custom-thresholding-function-in-perceptual-wavelet-packet-domain-1802.05962</loc><lastmod>2018-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-an-exponential-model-based-threshold-and-a-custom-thresholding-function-in-perceptual-wavelet-packet-domain-1802.05962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-an-exponential-model-based-threshold-and-a-custom-thresholding-function-in-perceptual-wavelet-packet-domain-1802.05962"/></url>
<url><loc>https://scifaro.com/en/abs/crepe-a-convolutional-representation-for-pitch-estimation-1802.06182</loc><lastmod>2018-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crepe-a-convolutional-representation-for-pitch-estimation-1802.06182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crepe-a-convolutional-representation-for-pitch-estimation-1802.06182"/></url>
<url><loc>https://scifaro.com/en/abs/rls-based-adaptive-dereverberation-tracing-abrupt-position-change-of-target-speaker-1802.08997</loc><lastmod>2018-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rls-based-adaptive-dereverberation-tracing-abrupt-position-change-of-target-speaker-1802.08997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rls-based-adaptive-dereverberation-tracing-abrupt-position-change-of-target-speaker-1802.08997"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-trinicon-based-blind-source-separation-method-with-multi-source-activity-detection-for-sparsely-mixed-signals-1802.09005</loc><lastmod>2018-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-trinicon-based-blind-source-separation-method-with-multi-source-activity-detection-for-sparsely-mixed-signals-1802.09005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-trinicon-based-blind-source-separation-method-with-multi-source-activity-detection-for-sparsely-mixed-signals-1802.09005"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-source-separation-based-on-simplex-analysis-1802.09221</loc><lastmod>2018-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-source-separation-based-on-simplex-analysis-1802.09221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-source-separation-based-on-simplex-analysis-1802.09221"/></url>
<url><loc>https://scifaro.com/en/abs/pop-music-highlighter-marking-the-emotion-keypoints-1802.10495</loc><lastmod>2018-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pop-music-highlighter-marking-the-emotion-keypoints-1802.10495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pop-music-highlighter-marking-the-emotion-keypoints-1802.10495"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-in-adverse-environments-based-on-non-stationary-noise-driven-spectral-subtraction-and-snr-dependent-phase-compensation-1803.00396</loc><lastmod>2018-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-in-adverse-environments-based-on-non-stationary-noise-driven-spectral-subtraction-and-snr-dependent-phase-compensation-1803.00396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-in-adverse-environments-based-on-non-stationary-noise-driven-spectral-subtraction-and-snr-dependent-phase-compensation-1803.00396"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-steal-your-vocal-identity-from-the-internet-initial-investigation-of-cloning-obama-s-voice-using-gan-wavenet-and-low-quality-found-data-1803.00860</loc><lastmod>2018-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-steal-your-vocal-identity-from-the-internet-initial-investigation-of-cloning-obama-s-voice-using-gan-wavenet-and-low-quality-found-data-1803.00860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-steal-your-vocal-identity-from-the-internet-initial-investigation-of-cloning-obama-s-voice-using-gan-wavenet-and-low-quality-found-data-1803.00860"/></url>
<url><loc>https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1803.00886</loc><lastmod>2018-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1803.00886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-factorization-for-speech-signal-1803.00886"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-framework-of-voice-based-emotion-recognition-system-for-films-and-tv-programs-1803.01122</loc><lastmod>2018-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-framework-of-voice-based-emotion-recognition-system-for-films-and-tv-programs-1803.01122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-framework-of-voice-based-emotion-recognition-system-for-films-and-tv-programs-1803.01122"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-a-gaussian-modeling-based-threshold-and-a-pdf-dependent-thresholding-function-1803.01841</loc><lastmod>2018-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-a-gaussian-modeling-based-threshold-and-a-pdf-dependent-thresholding-function-1803.01841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-noisy-speech-exploiting-a-gaussian-modeling-based-threshold-and-a-pdf-dependent-thresholding-function-1803.01841"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-attention-model-for-weakly-supervised-audio-classification-1803.02353</loc><lastmod>2018-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-attention-model-for-weakly-supervised-audio-classification-1803.02353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-attention-model-for-weakly-supervised-audio-classification-1803.02353"/></url>
<url><loc>https://scifaro.com/en/abs/linear-networks-based-speaker-adaptation-for-speech-synthesis-1803.02445</loc><lastmod>2018-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linear-networks-based-speaker-adaptation-for-speech-synthesis-1803.02445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linear-networks-based-speaker-adaptation-for-speech-synthesis-1803.02445"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-based-on-non-stationary-noise-driven-geometric-spectral-subtraction-and-phase-spectrum-compensation-1803.02870</loc><lastmod>2018-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-based-on-non-stationary-noise-driven-geometric-spectral-subtraction-and-phase-spectrum-compensation-1803.02870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-based-on-non-stationary-noise-driven-geometric-spectral-subtraction-and-phase-spectrum-compensation-1803.02870"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-singing-f0-with-neural-network-driven-transition-sustain-models-1803.04030</loc><lastmod>2018-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-singing-f0-with-neural-network-driven-transition-sustain-models-1803.04030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-singing-f0-with-neural-network-driven-transition-sustain-models-1803.04030"/></url>
<url><loc>https://scifaro.com/en/abs/deep-cnn-based-feature-extractor-for-text-prompted-speaker-recognition-1803.05307</loc><lastmod>2018-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-cnn-based-feature-extractor-for-text-prompted-speaker-recognition-1803.05307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-cnn-based-feature-extractor-for-text-prompted-speaker-recognition-1803.05307"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-using-convolutional-neural-networks-1803.05427</loc><lastmod>2018-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-using-convolutional-neural-networks-1803.05427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-using-convolutional-neural-networks-1803.05427"/></url>
<url><loc>https://scifaro.com/en/abs/directional-emphasis-in-ambisonics-1803.06718</loc><lastmod>2018-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-emphasis-in-ambisonics-1803.06718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-emphasis-in-ambisonics-1803.06718"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-using-fully-convolutional-networks-1803.08243</loc><lastmod>2019-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-using-fully-convolutional-networks-1803.08243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-using-fully-convolutional-networks-1803.08243"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-robustness-of-features-and-enhancement-on-speech-recognition-systems-in-highly-reverberant-real-environments-1803.09013</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-robustness-of-features-and-enhancement-on-speech-recognition-systems-in-highly-reverberant-real-environments-1803.09013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-robustness-of-features-and-enhancement-on-speech-recognition-systems-in-highly-reverberant-real-environments-1803.09013"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-dnn-based-spectral-feature-mapping-that-removes-noise-and-reverberation-for-robust-automatic-speech-recognition-1803.09016</loc><lastmod>2018-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-dnn-based-spectral-feature-mapping-that-removes-noise-and-reverberation-for-robust-automatic-speech-recognition-1803.09016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-dnn-based-spectral-feature-mapping-that-removes-noise-and-reverberation-for-robust-automatic-speech-recognition-1803.09016"/></url>
<url><loc>https://scifaro.com/en/abs/complex-valued-restricted-boltzmann-machine-for-direct-speech-parameterization-from-complex-spectra-1803.09946</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-valued-restricted-boltzmann-machine-for-direct-speech-parameterization-from-complex-spectra-1803.09946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-valued-restricted-boltzmann-machine-for-direct-speech-parameterization-from-complex-spectra-1803.09946"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-minimisation-of-masking-in-multitrack-audio-using-subgroups-1803.09960</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-minimisation-of-masking-in-multitrack-audio-using-subgroups-1803.09960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-minimisation-of-masking-in-multitrack-audio-using-subgroups-1803.09960"/></url>
<url><loc>https://scifaro.com/en/abs/student-teacher-learning-for-blstm-mask-based-speech-enhancement-1803.10013</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/student-teacher-learning-for-blstm-mask-based-speech-enhancement-1803.10013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/student-teacher-learning-for-blstm-mask-based-speech-enhancement-1803.10013"/></url>
<url><loc>https://scifaro.com/en/abs/comprehending-real-numbers-development-of-bengali-real-number-speech-corpus-1803.10136</loc><lastmod>2018-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comprehending-real-numbers-development-of-bengali-real-number-speech-corpus-1803.10136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comprehending-real-numbers-development-of-bengali-real-number-speech-corpus-1803.10136"/></url>
<url><loc>https://scifaro.com/en/abs/light-gated-recurrent-units-for-speech-recognition-1803.10225</loc><lastmod>2018-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/light-gated-recurrent-units-for-speech-recognition-1803.10225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/light-gated-recurrent-units-for-speech-recognition-1803.10225"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-statistics-pooling-for-deep-speaker-embedding-1803.10963</loc><lastmod>2019-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-statistics-pooling-for-deep-speaker-embedding-1803.10963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-statistics-pooling-for-deep-speaker-embedding-1803.10963"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-alzheimer-s-disease-using-gated-convolutional-neural-network-from-audio-data-1803.11344</loc><lastmod>2018-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-alzheimer-s-disease-using-gated-convolutional-neural-network-from-audio-data-1803.11344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-alzheimer-s-disease-using-gated-convolutional-neural-network-from-audio-data-1803.11344"/></url>
<url><loc>https://scifaro.com/en/abs/i-vector-transformation-using-conditional-generative-adversarial-networks-for-short-utterance-speaker-verification-1804.00290</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-vector-transformation-using-conditional-generative-adversarial-networks-for-short-utterance-speaker-verification-1804.00290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-vector-transformation-using-conditional-generative-adversarial-networks-for-short-utterance-speaker-verification-1804.00290"/></url>
<url><loc>https://scifaro.com/en/abs/insights-into-end-to-end-learning-scheme-for-language-identification-1804.00381</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/insights-into-end-to-end-learning-scheme-for-language-identification-1804.00381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/insights-into-end-to-end-learning-scheme-for-language-identification-1804.00381"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-learnable-dictionary-encoding-layer-for-end-to-end-language-identification-1804.00385</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-learnable-dictionary-encoding-layer-for-end-to-end-language-identification-1804.00385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-learnable-dictionary-encoding-layer-for-end-to-end-language-identification-1804.00385"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-nonparallel-voice-conversion-based-on-cycle-consistent-adversarial-network-1804.00425</loc><lastmod>2018-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-nonparallel-voice-conversion-based-on-cycle-consistent-adversarial-network-1804.00425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-nonparallel-voice-conversion-based-on-cycle-consistent-adversarial-network-1804.00425"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-teacher-student-learning-for-unsupervised-domain-adaptation-1804.00644</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-teacher-student-learning-for-unsupervised-domain-adaptation-1804.00644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-teacher-student-learning-for-unsupervised-domain-adaptation-1804.00644"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-invariant-training-via-adversarial-learning-1804.00732</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-invariant-training-via-adversarial-learning-1804.00732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-invariant-training-via-adversarial-learning-1804.00732"/></url>
<url><loc>https://scifaro.com/en/abs/speech-waveform-synthesis-from-mfcc-sequences-with-generative-adversarial-networks-1804.00920</loc><lastmod>2018-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-waveform-synthesis-from-mfcc-sequences-with-generative-adversarial-networks-1804.00920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-waveform-synthesis-from-mfcc-sequences-with-generative-adversarial-networks-1804.00920"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-recent-waveform-generation-and-acoustic-modeling-methods-for-neural-network-based-speech-synthesis-1804.02549</loc><lastmod>2018-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-recent-waveform-generation-and-acoustic-modeling-methods-for-neural-network-based-speech-synthesis-1804.02549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-recent-waveform-generation-and-acoustic-modeling-methods-for-neural-network-based-speech-synthesis-1804.02549"/></url>
<url><loc>https://scifaro.com/en/abs/multi-target-voice-conversion-without-parallel-data-by-adversarially-learning-disentangled-audio-representations-1804.02812</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-target-voice-conversion-without-parallel-data-by-adversarially-learning-disentangled-audio-representations-1804.02812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-target-voice-conversion-without-parallel-data-by-adversarially-learning-disentangled-audio-representations-1804.02812"/></url>
<url><loc>https://scifaro.com/en/abs/the-voice-conversion-challenge-2018-promoting-development-of-parallel-and-nonparallel-methods-1804.04262</loc><lastmod>2018-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voice-conversion-challenge-2018-promoting-development-of-parallel-and-nonparallel-methods-1804.04262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voice-conversion-challenge-2018-promoting-development-of-parallel-and-nonparallel-methods-1804.04262"/></url>
<url><loc>https://scifaro.com/en/abs/global-snr-estimation-of-speech-signals-using-entropy-and-uncertainty-estimates-from-dropout-networks-1804.04353</loc><lastmod>2018-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-snr-estimation-of-speech-signals-using-entropy-and-uncertainty-estimates-from-dropout-networks-1804.04353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-snr-estimation-of-speech-signals-using-entropy-and-uncertainty-estimates-from-dropout-networks-1804.04353"/></url>
<url><loc>https://scifaro.com/en/abs/language-recognition-using-time-delay-deep-neural-network-1804.05000</loc><lastmod>2018-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-recognition-using-time-delay-deep-neural-network-1804.05000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-recognition-using-time-delay-deep-neural-network-1804.05000"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-encoding-layer-and-loss-function-in-end-to-end-speaker-and-language-recognition-system-1804.05160</loc><lastmod>2018-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-encoding-layer-and-loss-function-in-end-to-end-speaker-and-language-recognition-system-1804.05160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-encoding-layer-and-loss-function-in-end-to-end-speaker-and-language-recognition-system-1804.05160"/></url>
<url><loc>https://scifaro.com/en/abs/twin-regularization-for-online-speech-recognition-1804.05374</loc><lastmod>2018-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/twin-regularization-for-online-speech-recognition-1804.05374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/twin-regularization-for-online-speech-recognition-1804.05374"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-throat-microphone-recordings-using-gaussian-mixture-model-probabilistic-estimator-1804.05937</loc><lastmod>2018-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-throat-microphone-recordings-using-gaussian-mixture-model-probabilistic-estimator-1804.05937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-throat-microphone-recordings-using-gaussian-mixture-model-probabilistic-estimator-1804.05937"/></url>
<url><loc>https://scifaro.com/en/abs/precise-detection-of-speech-endpoints-dynamically-a-wavelet-convolution-based-approach-1804.06159</loc><lastmod>2018-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/precise-detection-of-speech-endpoints-dynamically-a-wavelet-convolution-based-approach-1804.06159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/precise-detection-of-speech-endpoints-dynamically-a-wavelet-convolution-based-approach-1804.06159"/></url>
<url><loc>https://scifaro.com/en/abs/the-2018-signal-separation-evaluation-campaign-1804.06267</loc><lastmod>2018-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-2018-signal-separation-evaluation-campaign-1804.06267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-2018-signal-separation-evaluation-campaign-1804.06267"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adversarial-for-acoustic-emotion-recognition-1804.07690</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adversarial-for-acoustic-emotion-recognition-1804.07690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adversarial-for-acoustic-emotion-recognition-1804.07690"/></url>
<url><loc>https://scifaro.com/en/abs/a-spoofing-benchmark-for-the-2018-voice-conversion-challenge-leveraging-from-spoofing-countermeasures-for-speech-artifact-assessment-1804.08438</loc><lastmod>2018-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-spoofing-benchmark-for-the-2018-voice-conversion-challenge-leveraging-from-spoofing-countermeasures-for-speech-artifact-assessment-1804.08438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-spoofing-benchmark-for-the-2018-voice-conversion-challenge-leveraging-from-spoofing-countermeasures-for-speech-artifact-assessment-1804.08438"/></url>
<url><loc>https://scifaro.com/en/abs/a-discriminative-acoustic-prosodic-approach-for-measuring-local-entrainment-1804.08663</loc><lastmod>2018-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-discriminative-acoustic-prosodic-approach-for-measuring-local-entrainment-1804.08663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-discriminative-acoustic-prosodic-approach-for-measuring-local-entrainment-1804.08663"/></url>
<url><loc>https://scifaro.com/en/abs/towards-an-unsupervised-entrainment-distance-in-conversational-speech-using-deep-neural-networks-1804.08782</loc><lastmod>2019-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-an-unsupervised-entrainment-distance-in-conversational-speech-using-deep-neural-networks-1804.08782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-an-unsupervised-entrainment-distance-in-conversational-speech-using-deep-neural-networks-1804.08782"/></url>
<url><loc>https://scifaro.com/en/abs/recent-progresses-in-deep-learning-based-acoustic-models-updated-1804.09298</loc><lastmod>2018-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-progresses-in-deep-learning-based-acoustic-models-updated-1804.09298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-progresses-in-deep-learning-based-acoustic-models-updated-1804.09298"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-raw-waveform-model-for-glottal-excitation-1804.09593</loc><lastmod>2018-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-raw-waveform-model-for-glottal-excitation-1804.09593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-raw-waveform-model-for-glottal-excitation-1804.09593"/></url>
<url><loc>https://scifaro.com/en/abs/t-dcf-a-detection-cost-function-for-the-tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-1804.09618</loc><lastmod>2019-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-dcf-a-detection-cost-function-for-the-tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-1804.09618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-dcf-a-detection-cost-function-for-the-tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-1804.09618"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multimodal-speech-recognition-1804.09713</loc><lastmod>2018-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multimodal-speech-recognition-1804.09713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multimodal-speech-recognition-1804.09713"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-dysarthric-speech-for-training-data-augmentation-in-clinical-speech-applications-1804.10325</loc><lastmod>2018-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-dysarthric-speech-for-training-data-augmentation-in-clinical-speech-applications-1804.10325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-dysarthric-speech-for-training-data-augmentation-in-clinical-speech-applications-1804.10325"/></url>
<url><loc>https://scifaro.com/en/abs/syllable-based-sequence-to-sequence-speech-recognition-with-the-transformer-in-mandarin-chinese-1804.10752</loc><lastmod>2018-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syllable-based-sequence-to-sequence-speech-recognition-with-the-transformer-in-mandarin-chinese-1804.10752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syllable-based-sequence-to-sequence-speech-recognition-with-the-transformer-in-mandarin-chinese-1804.10752"/></url>
<url><loc>https://scifaro.com/en/abs/ladder-networks-for-emotion-recognition-using-unsupervised-auxiliary-tasks-to-improve-predictions-of-emotional-attributes-1804.10816</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ladder-networks-for-emotion-recognition-using-unsupervised-auxiliary-tasks-to-improve-predictions-of-emotional-attributes-1804.10816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ladder-networks-for-emotion-recognition-using-unsupervised-auxiliary-tasks-to-improve-predictions-of-emotional-attributes-1804.10816"/></url>
<url><loc>https://scifaro.com/en/abs/collapsed-speech-segment-detection-and-suppression-for-wavenet-vocoder-1804.11055</loc><lastmod>2018-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collapsed-speech-segment-detection-and-suppression-for-wavenet-vocoder-1804.11055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collapsed-speech-segment-detection-and-suppression-for-wavenet-vocoder-1804.11055"/></url>
<url><loc>https://scifaro.com/en/abs/text-independent-speaker-verification-using-long-short-term-memory-networks-1805.00604</loc><lastmod>2018-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-independent-speaker-verification-using-long-short-term-memory-networks-1805.00604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-independent-speaker-verification-using-long-short-term-memory-networks-1805.00604"/></url>
<url><loc>https://scifaro.com/en/abs/supervector-compression-strategies-to-speed-up-i-vector-system-development-1805.01156</loc><lastmod>2018-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervector-compression-strategies-to-speed-up-i-vector-system-development-1805.01156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervector-compression-strategies-to-speed-up-i-vector-system-development-1805.01156"/></url>
<url><loc>https://scifaro.com/en/abs/deep-denoising-for-hearing-aid-applications-1805.01198</loc><lastmod>2019-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-denoising-for-hearing-aid-applications-1805.01198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-denoising-for-hearing-aid-applications-1805.01198"/></url>
<url><loc>https://scifaro.com/en/abs/capsule-networks-for-low-resource-spoken-language-understanding-1805.02922</loc><lastmod>2018-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capsule-networks-for-low-resource-spoken-language-understanding-1805.02922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capsule-networks-for-low-resource-spoken-language-understanding-1805.02922"/></url>
<url><loc>https://scifaro.com/en/abs/a-regression-model-of-recurrent-deep-neural-networks-for-noise-robust-estimation-of-the-fundamental-frequency-contour-of-speech-1805.02958</loc><lastmod>2018-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-regression-model-of-recurrent-deep-neural-networks-for-noise-robust-estimation-of-the-fundamental-frequency-contour-of-speech-1805.02958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-regression-model-of-recurrent-deep-neural-networks-for-noise-robust-estimation-of-the-fundamental-frequency-contour-of-speech-1805.02958"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-from-adult-to-children-for-speech-recognition-evaluation-analysis-and-recommendations-1805.03322</loc><lastmod>2018-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-from-adult-to-children-for-speech-recognition-evaluation-analysis-and-recommendations-1805.03322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-from-adult-to-children-for-speech-recognition-evaluation-analysis-and-recommendations-1805.03322"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-modeling-units-in-sequence-to-sequence-speech-recognition-with-the-transformer-on-mandarin-chinese-1805.06239</loc><lastmod>2018-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-modeling-units-in-sequence-to-sequence-speech-recognition-with-the-transformer-on-mandarin-chinese-1805.06239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-modeling-units-in-sequence-to-sequence-speech-recognition-with-the-transformer-on-mandarin-chinese-1805.06239"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-learning-of-raw-speech-features-for-domain-invariant-speech-recognition-1805.08615</loc><lastmod>2018-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-learning-of-raw-speech-features-for-domain-invariant-speech-recognition-1805.08615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-learning-of-raw-speech-features-for-domain-invariant-speech-recognition-1805.08615"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-using-deep-belief-networks-1805.08865</loc><lastmod>2018-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-using-deep-belief-networks-1805.08865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-using-deep-belief-networks-1805.08865"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-convolutional-neural-network-based-end-to-end-replay-anti-spoofing-1805.09164</loc><lastmod>2018-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-convolutional-neural-network-based-end-to-end-replay-anti-spoofing-1805.09164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-convolutional-neural-network-based-end-to-end-replay-anti-spoofing-1805.09164"/></url>
<url><loc>https://scifaro.com/en/abs/asr-based-features-for-emotion-recognition-a-transfer-learning-approach-1805.09197</loc><lastmod>2018-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-based-features-for-emotion-recognition-a-transfer-learning-approach-1805.09197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-based-features-for-emotion-recognition-a-transfer-learning-approach-1805.09197"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-driven-facial-animation-with-temporal-gans-1805.09313</loc><lastmod>2018-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-driven-facial-animation-with-temporal-gans-1805.09313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-driven-facial-animation-with-temporal-gans-1805.09313"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-multi-channel-wind-noise-based-on-the-corcos-model-1805.09679</loc><lastmod>2019-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-multi-channel-wind-noise-based-on-the-corcos-model-1805.09679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-multi-channel-wind-noise-based-on-the-corcos-model-1805.09679"/></url>
<url><loc>https://scifaro.com/en/abs/relative-transfer-function-estimation-exploiting-spatially-separated-microphones-in-a-diffuse-noise-field-1805.10333</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relative-transfer-function-estimation-exploiting-spatially-separated-microphones-in-a-diffuse-noise-field-1805.10333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relative-transfer-function-estimation-exploiting-spatially-separated-microphones-in-a-diffuse-noise-field-1805.10333"/></url>
<url><loc>https://scifaro.com/en/abs/curriculum-learning-for-speech-emotion-recognition-from-crowdsourced-labels-1805.10339</loc><lastmod>2022-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/curriculum-learning-for-speech-emotion-recognition-from-crowdsourced-labels-1805.10339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/curriculum-learning-for-speech-emotion-recognition-from-crowdsourced-labels-1805.10339"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-context-window-composition-for-distant-speech-recognition-1805.10498</loc><lastmod>2018-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-context-window-composition-for-distant-speech-recognition-1805.10498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-context-window-composition-for-distant-speech-recognition-1805.10498"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-speaker-segmentation-and-diarization-using-lexical-and-acoustic-cues-via-sequence-to-sequence-neural-networks-1805.10731</loc><lastmod>2018-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-speaker-segmentation-and-diarization-using-lexical-and-acoustic-cues-via-sequence-to-sequence-neural-networks-1805.10731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-speaker-segmentation-and-diarization-using-lexical-and-acoustic-cues-via-sequence-to-sequence-neural-networks-1805.10731"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-pursuit-and-dictionary-learning-for-blind-source-separation-in-polyphonic-music-recordings-1806.00273</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-pursuit-and-dictionary-learning-for-blind-source-separation-in-polyphonic-music-recordings-1806.00273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-pursuit-and-dictionary-learning-for-blind-source-separation-in-polyphonic-music-recordings-1806.00273"/></url>
<url><loc>https://scifaro.com/en/abs/performance-based-cost-functions-for-end-to-end-speech-separation-1806.00511</loc><lastmod>2018-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-based-cost-functions-for-end-to-end-speech-separation-1806.00511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-based-cost-functions-for-end-to-end-speech-separation-1806.00511"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-speech-enhancement-for-unseen-noises-using-monte-carlo-dropout-1806.00516</loc><lastmod>2018-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-speech-enhancement-for-unseen-noises-using-monte-carlo-dropout-1806.00516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-speech-enhancement-for-unseen-noises-using-monte-carlo-dropout-1806.00516"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-length-normalization-in-end-to-end-speaker-verification-system-1806.03209</loc><lastmod>2018-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-length-normalization-in-end-to-end-speaker-verification-system-1806.03209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-length-normalization-in-end-to-end-speaker-verification-system-1806.03209"/></url>
<url><loc>https://scifaro.com/en/abs/angular-softmax-loss-for-end-to-end-speaker-verification-1806.03464</loc><lastmod>2018-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/angular-softmax-loss-for-end-to-end-speaker-verification-1806.03464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/angular-softmax-loss-for-end-to-end-speaker-verification-1806.03464"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoders-for-music-sound-modeling-a-comparison-of-linear-shallow-deep-recurrent-and-variational-models-1806.04096</loc><lastmod>2019-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoders-for-music-sound-modeling-a-comparison-of-linear-shallow-deep-recurrent-and-variational-models-1806.04096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoders-for-music-sound-modeling-a-comparison-of-linear-shallow-deep-recurrent-and-variational-models-1806.04096"/></url>
<url><loc>https://scifaro.com/en/abs/model-based-speech-enhancement-for-intelligibility-improvement-in-binaural-hearing-aids-1806.04885</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-based-speech-enhancement-for-intelligibility-improvement-in-binaural-hearing-aids-1806.04885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-based-speech-enhancement-for-intelligibility-improvement-in-binaural-hearing-aids-1806.04885"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-end-to-end-speech-recognition-with-a-single-transformer-on-low-resource-languages-1806.05059</loc><lastmod>2018-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-end-to-end-speech-recognition-with-a-single-transformer-on-low-resource-languages-1806.05059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-end-to-end-speech-recognition-with-a-single-transformer-on-low-resource-languages-1806.05059"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-networks-for-denoising-of-arbitrary-numbers-of-channels-1806.05296</loc><lastmod>2018-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-networks-for-denoising-of-arbitrary-numbers-of-channels-1806.05296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-networks-for-denoising-of-arbitrary-numbers-of-channels-1806.05296"/></url>
<url><loc>https://scifaro.com/en/abs/a-weighted-superposition-of-functional-contours-model-for-modelling-contextual-prominence-of-elementary-prosodic-contours-1806.06779</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-weighted-superposition-of-functional-contours-model-for-modelling-contextual-prominence-of-elementary-prosodic-contours-1806.06779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-weighted-superposition-of-functional-contours-model-for-modelling-contextual-prominence-of-elementary-prosodic-contours-1806.06779"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automated-single-channel-source-separation-using-neural-networks-1806.08086</loc><lastmod>2018-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automated-single-channel-source-separation-using-neural-networks-1806.08086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automated-single-channel-source-separation-using-neural-networks-1806.08086"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-wavenet-a-multi-task-generative-model-for-statistical-parametric-speech-synthesis-without-fundamental-frequency-conditions-1806.08619</loc><lastmod>2018-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-wavenet-a-multi-task-generative-model-for-statistical-parametric-speech-synthesis-without-fundamental-frequency-conditions-1806.08619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-wavenet-a-multi-task-generative-model-for-statistical-parametric-speech-synthesis-without-fundamental-frequency-conditions-1806.08619"/></url>
<url><loc>https://scifaro.com/en/abs/a-variational-prosody-model-for-mapping-the-context-sensitive-variation-of-functional-prosodic-prototypes-1806.08685</loc><lastmod>2019-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variational-prosody-model-for-mapping-the-context-sensitive-variation-of-functional-prosodic-prototypes-1806.08685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variational-prosody-model-for-mapping-the-context-sensitive-variation-of-functional-prosodic-prototypes-1806.08685"/></url>
<url><loc>https://scifaro.com/en/abs/perceptually-relevant-preservation-of-interaural-time-differences-in-binaural-hearing-aids-1806.09169</loc><lastmod>2018-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptually-relevant-preservation-of-interaural-time-differences-in-binaural-hearing-aids-1806.09169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptually-relevant-preservation-of-interaural-time-differences-in-binaural-hearing-aids-1806.09169"/></url>
<url><loc>https://scifaro.com/en/abs/emphasis-an-emotional-phoneme-based-acoustic-model-for-speech-synthesis-system-1806.09276</loc><lastmod>2018-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emphasis-an-emotional-phoneme-based-acoustic-model-for-speech-synthesis-system-1806.09276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emphasis-an-emotional-phoneme-based-acoustic-model-for-speech-synthesis-system-1806.09276"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-networks-to-enhance-coded-speech-1806.09411</loc><lastmod>2019-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-networks-to-enhance-coded-speech-1806.09411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-networks-to-enhance-coded-speech-1806.09411"/></url>
<url><loc>https://scifaro.com/en/abs/independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-1806.10307</loc><lastmod>2018-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-1806.10307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-deeply-learned-matrix-analysis-for-multichannel-audio-source-separation-1806.10307"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-with-deep-feature-losses-1806.10522</loc><lastmod>2018-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-with-deep-feature-losses-1806.10522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-with-deep-feature-losses-1806.10522"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-to-single-sinusoid-regression-to-estimate-the-f0-contour-from-noisy-speech-using-recurrent-deep-neural-networks-1807.00752</loc><lastmod>2018-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-to-single-sinusoid-regression-to-estimate-the-f0-contour-from-noisy-speech-using-recurrent-deep-neural-networks-1807.00752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-to-single-sinusoid-regression-to-estimate-the-f0-contour-from-noisy-speech-using-recurrent-deep-neural-networks-1807.00752"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-role-of-l1-in-automatic-pronunciation-evaluation-of-l2-speech-1807.01738</loc><lastmod>2018-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-role-of-l1-in-automatic-pronunciation-evaluation-of-l2-speech-1807.01738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-role-of-l1-in-automatic-pronunciation-evaluation-of-l2-speech-1807.01738"/></url>
<url><loc>https://scifaro.com/en/abs/tone-recognition-using-lifters-and-ctc-1807.02465</loc><lastmod>2018-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tone-recognition-using-lifters-and-ctc-1807.02465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tone-recognition-using-lifters-and-ctc-1807.02465"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-synthesis-model-of-sparse-audio-declipper-1807.03612</loc><lastmod>2018-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-synthesis-model-of-sparse-audio-declipper-1807.03612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-synthesis-model-of-sparse-audio-declipper-1807.03612"/></url>
<url><loc>https://scifaro.com/en/abs/rtf-based-binaural-mvdr-beamformer-exploiting-an-external-microphone-in-a-diffuse-noise-field-1807.04096</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rtf-based-binaural-mvdr-beamformer-exploiting-an-external-microphone-in-a-diffuse-noise-field-1807.04096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rtf-based-binaural-mvdr-beamformer-exploiting-an-external-microphone-in-a-diffuse-noise-field-1807.04096"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-keyword-spotting-using-time-delay-neural-networks-1807.04353</loc><lastmod>2018-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-keyword-spotting-using-time-delay-neural-networks-1807.04353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-keyword-spotting-using-time-delay-neural-networks-1807.04353"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-binaural-lcmv-beamforming-in-complex-acoustic-scenarios-theoretical-and-practical-insights-1807.04636</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-binaural-lcmv-beamforming-in-complex-acoustic-scenarios-theoretical-and-practical-insights-1807.04636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-binaural-lcmv-beamforming-in-complex-acoustic-scenarios-theoretical-and-practical-insights-1807.04636"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-ctc-attention-based-end-to-end-speech-recognition-using-subword-units-1807.04978</loc><lastmod>2018-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-ctc-attention-based-end-to-end-speech-recognition-using-subword-units-1807.04978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-ctc-attention-based-end-to-end-speech-recognition-using-subword-units-1807.04978"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-adaptation-techniques-and-recurrent-neural-network-architectures-1807.06441</loc><lastmod>2018-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-adaptation-techniques-and-recurrent-neural-network-architectures-1807.06441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-adaptation-techniques-and-recurrent-neural-network-architectures-1807.06441"/></url>
<url><loc>https://scifaro.com/en/abs/learning-noise-invariant-representations-for-robust-speech-recognition-1807.06610</loc><lastmod>2018-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-noise-invariant-representations-for-robust-speech-recognition-1807.06610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-noise-invariant-representations-for-robust-speech-recognition-1807.06610"/></url>
<url><loc>https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-mce-plan-dataset-and-baseline-system-1807.06663</loc><lastmod>2018-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-mce-plan-dataset-and-baseline-system-1807.06663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-mce-plan-dataset-and-baseline-system-1807.06663"/></url>
<url><loc>https://scifaro.com/en/abs/a-capsule-based-approach-for-polyphonic-sound-event-detection-1807.07436</loc><lastmod>2018-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-capsule-based-approach-for-polyphonic-sound-event-detection-1807.07436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-capsule-based-approach-for-polyphonic-sound-event-detection-1807.07436"/></url>
<url><loc>https://scifaro.com/en/abs/unified-hypersphere-embedding-for-speaker-recognition-1807.08312</loc><lastmod>2018-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-hypersphere-embedding-for-speaker-recognition-1807.08312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-hypersphere-embedding-for-speaker-recognition-1807.08312"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-word-recognition-with-sequence-to-sequence-models-1807.09597</loc><lastmod>2018-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-word-recognition-with-sequence-to-sequence-models-1807.09597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-word-recognition-with-sequence-to-sequence-models-1807.09597"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-device-dataset-for-urban-acoustic-scene-classification-1807.09840</loc><lastmod>2018-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-device-dataset-for-urban-acoustic-scene-classification-1807.09840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-device-dataset-for-urban-acoustic-scene-classification-1807.09840"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-techniques-for-language-model-integration-in-encoder-decoder-speech-recognition-1807.10857</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-techniques-for-language-model-integration-in-encoder-decoder-speech-recognition-1807.10857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-techniques-for-language-model-integration-in-encoder-decoder-speech-recognition-1807.10857"/></url>
<url><loc>https://scifaro.com/en/abs/analysing-shortcomings-of-statistical-parametric-speech-synthesis-1807.10941</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysing-shortcomings-of-statistical-parametric-speech-synthesis-1807.10941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysing-shortcomings-of-statistical-parametric-speech-synthesis-1807.10941"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2018-challenge-task-5-monitoring-of-domestic-activities-based-on-multi-channel-acoustics-1807.11246</loc><lastmod>2018-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2018-challenge-task-5-monitoring-of-domestic-activities-based-on-multi-channel-acoustics-1807.11246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2018-challenge-task-5-monitoring-of-domestic-activities-based-on-multi-channel-acoustics-1807.11246"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-domain-adaptation-by-adversarial-learning-for-robust-speech-recognition-1807.11284</loc><lastmod>2018-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-by-adversarial-learning-for-robust-speech-recognition-1807.11284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-by-adversarial-learning-for-robust-speech-recognition-1807.11284"/></url>
<url><loc>https://scifaro.com/en/abs/deep-encoder-decoder-models-for-unsupervised-learning-of-controllable-speech-synthesis-1807.11470</loc><lastmod>2018-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-encoder-decoder-models-for-unsupervised-learning-of-controllable-speech-synthesis-1807.11470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-encoder-decoder-models-for-unsupervised-learning-of-controllable-speech-synthesis-1807.11470"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-and-bias-codes-for-modeling-speaker-adaptive-dnn-based-speech-synthesis-systems-1807.11632</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-and-bias-codes-for-modeling-speaker-adaptive-dnn-based-speech-synthesis-systems-1807.11632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-and-bias-codes-for-modeling-speaker-adaptive-dnn-based-speech-synthesis-systems-1807.11632"/></url>
<url><loc>https://scifaro.com/en/abs/wasserstein-gan-and-waveform-loss-based-acoustic-model-training-for-multi-speaker-text-to-speech-synthesis-systems-using-a-wavenet-vocoder-1807.11679</loc><lastmod>2018-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wasserstein-gan-and-waveform-loss-based-acoustic-model-training-for-multi-speaker-text-to-speech-synthesis-systems-using-a-wavenet-vocoder-1807.11679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wasserstein-gan-and-waveform-loss-based-acoustic-model-training-for-multi-speaker-text-to-speech-synthesis-systems-using-a-wavenet-vocoder-1807.11679"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-doa-estimation-using-deep-convolutional-networks-trained-with-noise-signals-1807.11722</loc><lastmod>2019-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-doa-estimation-using-deep-convolutional-networks-trained-with-noise-signals-1807.11722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-doa-estimation-using-deep-convolutional-networks-trained-with-noise-signals-1807.11722"/></url>
<url><loc>https://scifaro.com/en/abs/manual-post-editing-of-automatically-transcribed-speeches-from-the-icelandic-parliament-althingi-1807.11893</loc><lastmod>2018-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manual-post-editing-of-automatically-transcribed-speeches-from-the-icelandic-parliament-althingi-1807.11893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manual-post-editing-of-automatically-transcribed-speeches-from-the-icelandic-parliament-althingi-1807.11893"/></url>
<url><loc>https://scifaro.com/en/abs/delay-performance-tradeoffs-in-causal-microphone-array-processing-1808.00082</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delay-performance-tradeoffs-in-causal-microphone-array-processing-1808.00082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delay-performance-tradeoffs-in-causal-microphone-array-processing-1808.00082"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-using-partially-asynchronous-microphone-arrays-without-resampling-1808.00096</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-using-partially-asynchronous-microphone-arrays-without-resampling-1808.00096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-using-partially-asynchronous-microphone-arrays-without-resampling-1808.00096"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-from-raw-waveform-with-sincnet-1808.00158</loc><lastmod>2019-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-from-raw-waveform-with-sincnet-1808.00158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-from-raw-waveform-with-sincnet-1808.00158"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-accuracy-of-pitch-accent-annotations-in-neural-network-based-speech-synthesis-and-denoising-effects-1808.00665</loc><lastmod>2018-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-accuracy-of-pitch-accent-annotations-in-neural-network-based-speech-synthesis-and-denoising-effects-1808.00665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-accuracy-of-pitch-accent-annotations-in-neural-network-based-speech-synthesis-and-denoising-effects-1808.00665"/></url>
<url><loc>https://scifaro.com/en/abs/prosodic-enhanced-siamese-convolutional-neural-networks-for-cross-device-text-independent-speaker-verification-1808.01026</loc><lastmod>2018-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosodic-enhanced-siamese-convolutional-neural-networks-for-cross-device-text-independent-speaker-verification-1808.01026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosodic-enhanced-siamese-convolutional-neural-networks-for-cross-device-text-independent-speaker-verification-1808.01026"/></url>
<url><loc>https://scifaro.com/en/abs/triplet-network-with-attention-for-speaker-diarization-1808.01535</loc><lastmod>2018-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/triplet-network-with-attention-for-speaker-diarization-1808.01535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/triplet-network-with-attention-for-speaker-diarization-1808.01535"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-a-competition-review-1808.02357</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-a-competition-review-1808.02357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-a-competition-review-1808.02357"/></url>
<url><loc>https://scifaro.com/en/abs/deep-context-end-to-end-contextual-speech-recognition-1808.02480</loc><lastmod>2018-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-context-end-to-end-contextual-speech-recognition-1808.02480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-context-end-to-end-contextual-speech-recognition-1808.02480"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-acoustic-scene-classification-1808.05777</loc><lastmod>2018-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-acoustic-scene-classification-1808.05777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-adversarial-domain-adaptation-for-acoustic-scene-classification-1808.05777"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-speech-synthesis-architecture-for-unsupervised-speaker-adaptation-1808.06288</loc><lastmod>2018-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-speech-synthesis-architecture-for-unsupervised-speaker-adaptation-1808.06288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-speech-synthesis-architecture-for-unsupervised-speaker-adaptation-1808.06288"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-speech-enhancement-using-exponent-only-floating-point-quantized-neural-network-eofp-qnn-1808.06474</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-speech-enhancement-using-exponent-only-floating-point-quantized-neural-network-eofp-qnn-1808.06474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-speech-enhancement-using-exponent-only-floating-point-quantized-neural-network-eofp-qnn-1808.06474"/></url>
<url><loc>https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-time-variant-sub-gaussian-source-model-1808.08056</loc><lastmod>2018-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-time-variant-sub-gaussian-source-model-1808.08056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-low-rank-matrix-analysis-based-on-time-variant-sub-gaussian-source-model-1808.08056"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-attention-model-for-speech-command-recognition-1808.08929</loc><lastmod>2018-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-attention-model-for-speech-command-recognition-1808.08929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-attention-model-for-speech-command-recognition-1808.08929"/></url>
<url><loc>https://scifaro.com/en/abs/using-monte-carlo-dropout-for-non-stationary-noise-reduction-from-speech-1808.09432</loc><lastmod>2018-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-monte-carlo-dropout-for-non-stationary-noise-reduction-from-speech-1808.09432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-monte-carlo-dropout-for-non-stationary-noise-reduction-from-speech-1808.09432"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-based-on-cross-domain-features-using-variational-auto-encoders-1808.09634</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-based-on-cross-domain-features-using-variational-auto-encoders-1808.09634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-based-on-cross-domain-features-using-variational-auto-encoders-1808.09634"/></url>
<url><loc>https://scifaro.com/en/abs/replay-spoofing-detection-system-for-automatic-speaker-verification-using-multi-task-learning-of-noise-classes-1808.09638</loc><lastmod>2018-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/replay-spoofing-detection-system-for-automatic-speaker-verification-using-multi-task-learning-of-noise-classes-1808.09638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/replay-spoofing-detection-system-for-automatic-speaker-verification-using-multi-task-learning-of-noise-classes-1808.09638"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-with-adaptive-computation-steps-1808.10088</loc><lastmod>2018-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-adaptive-computation-steps-1808.10088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-adaptive-computation-steps-1808.10088"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-audio-visual-fusion-for-robust-automatic-speech-recognition-1809.01728</loc><lastmod>2019-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-audio-visual-fusion-for-robust-automatic-speech-recognition-1809.01728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-audio-visual-fusion-for-robust-automatic-speech-recognition-1809.01728"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-feature-mapping-for-speech-enhancement-1809.02251</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-feature-mapping-for-speech-enhancement-1809.02251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-feature-mapping-for-speech-enhancement-1809.02251"/></url>
<url><loc>https://scifaro.com/en/abs/cycle-consistent-speech-enhancement-1809.02253</loc><lastmod>2019-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycle-consistent-speech-enhancement-1809.02253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycle-consistent-speech-enhancement-1809.02253"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-language-identification-using-netfv-and-netvlad-1809.02906</loc><lastmod>2018-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-language-identification-using-netfv-and-netvlad-1809.02906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-language-identification-using-netfv-and-netvlad-1809.02906"/></url>
<url><loc>https://scifaro.com/en/abs/dual-label-deep-lstm-dereverberation-for-speaker-verification-1809.03868</loc><lastmod>2018-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-label-deep-lstm-dereverberation-for-speaker-verification-1809.03868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-label-deep-lstm-dereverberation-for-speaker-verification-1809.03868"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-speaker-identification-for-a-service-robot-using-a-cnn-based-generic-verifier-1809.04115</loc><lastmod>2018-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-speaker-identification-for-a-service-robot-using-a-cnn-based-generic-verifier-1809.04115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-speaker-identification-for-a-service-robot-using-a-cnn-based-generic-verifier-1809.04115"/></url>
<url><loc>https://scifaro.com/en/abs/frame-level-speaker-embeddings-for-text-independent-speaker-recognition-and-analysis-of-end-to-end-model-1809.04437</loc><lastmod>2018-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-level-speaker-embeddings-for-text-independent-speaker-recognition-and-analysis-of-end-to-end-model-1809.04437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-level-speaker-embeddings-for-text-independent-speaker-recognition-and-analysis-of-end-to-end-model-1809.04437"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-representation-learning-of-speech-for-dialect-identification-1809.04458</loc><lastmod>2018-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-representation-learning-of-speech-for-dialect-identification-1809.04458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-representation-learning-of-speech-for-dialect-identification-1809.04458"/></url>
<url><loc>https://scifaro.com/en/abs/generative-x-vectors-for-text-independent-speaker-verification-1809.06798</loc><lastmod>2018-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-x-vectors-for-text-independent-speaker-verification-1809.06798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-x-vectors-for-text-independent-speaker-verification-1809.06798"/></url>
<url><loc>https://scifaro.com/en/abs/visual-speech-language-models-1809.06800</loc><lastmod>2018-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-speech-language-models-1809.06800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-speech-language-models-1809.06800"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-multi-accented-lstm-ctc-speech-recognition-using-a-domain-specific-student-teacher-learning-paradigm-1809.06833</loc><lastmod>2019-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-multi-accented-lstm-ctc-speech-recognition-using-a-domain-specific-student-teacher-learning-paradigm-1809.06833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-multi-accented-lstm-ctc-speech-recognition-using-a-domain-specific-student-teacher-learning-paradigm-1809.06833"/></url>
<url><loc>https://scifaro.com/en/abs/new-insights-on-the-optimality-of-parameterized-wiener-filters-for-speech-enhancement-applications-1809.07384</loc><lastmod>2018-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/new-insights-on-the-optimality-of-parameterized-wiener-filters-for-speech-enhancement-applications-1809.07384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/new-insights-on-the-optimality-of-parameterized-wiener-filters-for-speech-enhancement-applications-1809.07384"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-mcc-phat-for-the-locata-challenge-task-1-and-task-3-1809.07549</loc><lastmod>2018-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-mcc-phat-for-the-locata-challenge-task-1-and-task-3-1809.07549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-mcc-phat-for-the-locata-challenge-task-1-and-task-3-1809.07549"/></url>
<url><loc>https://scifaro.com/en/abs/from-audio-to-semantics-approaches-to-end-to-end-spoken-language-understanding-1809.09190</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-audio-to-semantics-approaches-to-end-to-end-spoken-language-understanding-1809.09190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-audio-to-semantics-approaches-to-end-to-end-spoken-language-understanding-1809.09190"/></url>
<url><loc>https://scifaro.com/en/abs/error-reduction-network-for-dblstm-based-voice-conversion-1809.09841</loc><lastmod>2018-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/error-reduction-network-for-dblstm-based-voice-conversion-1809.09841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/error-reduction-network-for-dblstm-based-voice-conversion-1809.09841"/></url>
<url><loc>https://scifaro.com/en/abs/wavecyclegan-synthetic-to-natural-speech-waveform-conversion-using-cycle-consistent-adversarial-networks-1809.10288</loc><lastmod>2018-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavecyclegan-synthetic-to-natural-speech-waveform-conversion-using-cycle-consistent-adversarial-networks-1809.10288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavecyclegan-synthetic-to-natural-speech-waveform-conversion-using-cycle-consistent-adversarial-networks-1809.10288"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-networks-for-supervised-single-channel-speech-separation-1810.02568</loc><lastmod>2018-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-networks-for-supervised-single-channel-speech-separation-1810.02568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-networks-for-supervised-single-channel-speech-separation-1810.02568"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-overlapped-speech-in-meetings-a-multichannel-separation-approach-using-neural-networks-1810.03655</loc><lastmod>2018-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-overlapped-speech-in-meetings-a-multichannel-separation-approach-using-neural-networks-1810.03655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-overlapped-speech-in-meetings-a-multichannel-separation-approach-using-neural-networks-1810.03655"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-neural-networks-and-x-vector-embedding-for-dcase2018-acoustic-scene-classification-challenge-1810.04273</loc><lastmod>2018-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-neural-networks-and-x-vector-embedding-for-dcase2018-acoustic-scene-classification-challenge-1810.04273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-neural-networks-and-x-vector-embedding-for-dcase2018-acoustic-scene-classification-challenge-1810.04273"/></url>
<url><loc>https://scifaro.com/en/abs/fully-supervised-speaker-diarization-1810.04719</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-supervised-speaker-diarization-1810.04719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-supervised-speaker-diarization-1810.04719"/></url>
<url><loc>https://scifaro.com/en/abs/voicefilter-targeted-voice-separation-by-speaker-conditioned-spectrogram-masking-1810.04826</loc><lastmod>2019-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicefilter-targeted-voice-separation-by-speaker-conditioned-spectrogram-masking-1810.04826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicefilter-targeted-voice-separation-by-speaker-conditioned-spectrogram-masking-1810.04826"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-chaotic-uniform-quantizer-for-speech-coding-1810.05260</loc><lastmod>2018-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-chaotic-uniform-quantizer-for-speech-coding-1810.05260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-chaotic-uniform-quantizer-for-speech-coding-1810.05260"/></url>
<url><loc>https://scifaro.com/en/abs/a-fully-time-domain-neural-model-for-subband-based-speech-synthesizer-1810.05319</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fully-time-domain-neural-model-for-subband-based-speech-synthesizer-1810.05319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fully-time-domain-neural-model-for-subband-based-speech-synthesizer-1810.05319"/></url>
<url><loc>https://scifaro.com/en/abs/federated-learning-for-keyword-spotting-1810.05512</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-learning-for-keyword-spotting-1810.05512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-learning-for-keyword-spotting-1810.05512"/></url>
<url><loc>https://scifaro.com/en/abs/robust-joint-estimation-of-multi-microphone-signal-model-parameters-1810.05677</loc><lastmod>2018-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-joint-estimation-of-multi-microphone-signal-model-parameters-1810.05677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-joint-estimation-of-multi-microphone-signal-model-parameters-1810.05677"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-detection-by-using-capsule-neural-networks-1810.06325</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-by-using-capsule-neural-networks-1810.06325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-by-using-capsule-neural-networks-1810.06325"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-nonlinear-audio-effects-with-end-to-end-deep-neural-networks-1810.06603</loc><lastmod>2019-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-nonlinear-audio-effects-with-end-to-end-deep-neural-networks-1810.06603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-nonlinear-audio-effects-with-end-to-end-deep-neural-networks-1810.06603"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-based-i-vector-mapping-for-speaker-verification-using-short-utterances-1810.07309</loc><lastmod>2018-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-based-i-vector-mapping-for-speaker-verification-using-short-utterances-1810.07309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-based-i-vector-mapping-for-speaker-verification-using-short-utterances-1810.07309"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-on-clean-data-for-end-to-end-speech-translation-fbk-iwslt-2018-1810.07652</loc><lastmod>2018-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-on-clean-data-for-end-to-end-speech-translation-fbk-iwslt-2018-1810.07652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-on-clean-data-for-end-to-end-speech-translation-fbk-iwslt-2018-1810.07652"/></url>
<url><loc>https://scifaro.com/en/abs/edgespeechnets-highly-efficient-deep-neural-networks-for-speech-recognition-on-the-edge-1810.08559</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edgespeechnets-highly-efficient-deep-neural-networks-for-speech-recognition-on-the-edge-1810.08559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edgespeechnets-highly-efficient-deep-neural-networks-for-speech-recognition-on-the-edge-1810.08559"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-difference-to-sum-power-ratio-of-speech-and-wind-noise-based-on-the-corcos-model-1810.09708</loc><lastmod>2019-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-difference-to-sum-power-ratio-of-speech-and-wind-noise-based-on-the-corcos-model-1810.09708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-difference-to-sum-power-ratio-of-speech-and-wind-noise-based-on-the-corcos-model-1810.09708"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-selective-beamformer-with-keyword-mask-estimation-1810.10727</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-selective-beamformer-with-keyword-mask-estimation-1810.10727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-selective-beamformer-with-keyword-mask-estimation-1810.10727"/></url>
<url><loc>https://scifaro.com/en/abs/short-utterance-compensation-in-speaker-verification-via-cosine-based-teacher-student-learning-of-speaker-embeddings-1810.10884</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/short-utterance-compensation-in-speaker-verification-via-cosine-based-teacher-student-learning-of-speaker-embeddings-1810.10884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/short-utterance-compensation-in-speaker-verification-via-cosine-based-teacher-student-learning-of-speaker-embeddings-1810.10884"/></url>
<url><loc>https://scifaro.com/en/abs/concatenated-identical-dnn-ci-dnn-to-reduce-noise-type-dependence-in-dnn-based-speech-enhancement-1810.11217</loc><lastmod>2018-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concatenated-identical-dnn-ci-dnn-to-reduce-noise-type-dependence-in-dnn-based-speech-enhancement-1810.11217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concatenated-identical-dnn-ci-dnn-to-reduce-noise-type-dependence-in-dnn-based-speech-enhancement-1810.11217"/></url>
<url><loc>https://scifaro.com/en/abs/gpurir-a-python-library-for-room-impulse-response-simulation-with-gpu-acceleration-1810.11359</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gpurir-a-python-library-for-room-impulse-response-simulation-with-gpu-acceleration-1810.11359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gpurir-a-python-library-for-room-impulse-response-simulation-with-gpu-acceleration-1810.11359"/></url>
<url><loc>https://scifaro.com/en/abs/lpcnet-improving-neural-speech-synthesis-through-linear-prediction-1810.11846</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lpcnet-improving-neural-speech-synthesis-through-linear-prediction-1810.11846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lpcnet-improving-neural-speech-synthesis-through-linear-prediction-1810.11846"/></url>
<url><loc>https://scifaro.com/en/abs/stft-spectral-loss-for-training-a-neural-speech-waveform-model-1810.11945</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stft-spectral-loss-for-training-a-neural-speech-waveform-model-1810.11945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stft-spectral-loss-for-training-a-neural-speech-waveform-model-1810.11945"/></url>
<url><loc>https://scifaro.com/en/abs/neural-source-filter-based-waveform-model-for-statistical-parametric-speech-synthesis-1810.11946</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-source-filter-based-waveform-model-for-statistical-parametric-speech-synthesis-1810.11946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-source-filter-based-waveform-model-for-statistical-parametric-speech-synthesis-1810.11946"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-enhanced-tacotron-text-to-speech-synthesis-systems-with-self-attention-for-pitch-accent-language-1810.11960</loc><lastmod>2019-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-enhanced-tacotron-text-to-speech-synthesis-systems-with-self-attention-for-pitch-accent-language-1810.11960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-enhanced-tacotron-text-to-speech-synthesis-systems-with-self-attention-for-pitch-accent-language-1810.11960"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-cnn-resbilstm-ctc-an-end-to-end-acoustic-model-for-speech-recognition-1810.12001</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-cnn-resbilstm-ctc-an-end-to-end-acoustic-model-for-speech-recognition-1810.12001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-cnn-resbilstm-ctc-an-end-to-end-acoustic-model-for-speech-recognition-1810.12001"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-speech-recognition-with-difficult-negative-training-examples-1810.12170</loc><lastmod>2018-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-speech-recognition-with-difficult-negative-training-examples-1810.12170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-speech-recognition-with-difficult-negative-training-examples-1810.12170"/></url>
<url><loc>https://scifaro.com/en/abs/a-proper-version-of-synthesis-based-sparse-audio-declipper-1810.12204</loc><lastmod>2020-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-proper-version-of-synthesis-based-sparse-audio-declipper-1810.12204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-proper-version-of-synthesis-based-sparse-audio-declipper-1810.12204"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-generation-for-text-to-speech-synthesis-using-pitch-synchronous-multi-scale-generative-adversarial-networks-1810.12598</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-generation-for-text-to-speech-synthesis-using-pitch-synchronous-multi-scale-generative-adversarial-networks-1810.12598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-generation-for-text-to-speech-synthesis-using-pitch-synchronous-multi-scale-generative-adversarial-networks-1810.12598"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-networks-for-unpaired-voice-transformation-on-impaired-speech-1810.12656</loc><lastmod>2019-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-networks-for-unpaired-voice-transformation-on-impaired-speech-1810.12656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-networks-for-unpaired-voice-transformation-on-impaired-speech-1810.12656"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-gaussian-process-audio-source-separation-using-spectrum-priors-in-the-time-domain-1810.12679</loc><lastmod>2018-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-gaussian-process-audio-source-separation-using-spectrum-priors-in-the-time-domain-1810.12679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-gaussian-process-audio-source-separation-using-spectrum-priors-in-the-time-domain-1810.12679"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-speaker-conversion-jointly-and-simultaneously-transforming-facial-expression-and-acoustic-characteristics-1810.12730</loc><lastmod>2018-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-speaker-conversion-jointly-and-simultaneously-transforming-facial-expression-and-acoustic-characteristics-1810.12730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-speaker-conversion-jointly-and-simultaneously-transforming-facial-expression-and-acoustic-characteristics-1810.12730"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-speech-enhancement-in-unseen-environments-with-noise-embeddings-1810.12757</loc><lastmod>2018-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-speech-enhancement-in-unseen-environments-with-noise-embeddings-1810.12757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-speech-enhancement-in-unseen-environments-with-noise-embeddings-1810.12757"/></url>
<url><loc>https://scifaro.com/en/abs/a-streamlined-encoder-decoder-architecture-for-melody-extraction-1810.12947</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-streamlined-encoder-decoder-architecture-for-melody-extraction-1810.12947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-streamlined-encoder-decoder-architecture-for-melody-extraction-1810.12947"/></url>
<url><loc>https://scifaro.com/en/abs/bi-directional-lattice-recurrent-neural-networks-for-confidence-estimation-1810.13024</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bi-directional-lattice-recurrent-neural-networks-for-confidence-estimation-1810.13024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bi-directional-lattice-recurrent-neural-networks-for-confidence-estimation-1810.13024"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-estimation-and-deletion-prediction-using-bidirectional-recurrent-neural-networks-1810.13025</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-estimation-and-deletion-prediction-using-bidirectional-recurrent-neural-networks-1810.13025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-estimation-and-deletion-prediction-using-bidirectional-recurrent-neural-networks-1810.13025"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-filtering-networks-for-audio-replay-attack-detection-1810.13048</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-filtering-networks-for-audio-replay-attack-detection-1810.13048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-filtering-networks-for-audio-replay-attack-detection-1810.13048"/></url>
<url><loc>https://scifaro.com/en/abs/latent-variable-approach-to-diarization-of-audio-recordings-using-ad-hoc-randomly-placed-mobile-devices-1810.13109</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-variable-approach-to-diarization-of-audio-recordings-using-ad-hoc-randomly-placed-mobile-devices-1810.13109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-variable-approach-to-diarization-of-audio-recordings-using-ad-hoc-randomly-placed-mobile-devices-1810.13109"/></url>
<url><loc>https://scifaro.com/en/abs/discriminatively-re-trained-i-vector-extractor-for-speaker-recognition-1810.13183</loc><lastmod>2018-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminatively-re-trained-i-vector-extractor-for-speaker-recognition-1810.13183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminatively-re-trained-i-vector-extractor-for-speaker-recognition-1810.13183"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-inductive-bias-of-words-in-acoustics-to-word-models-1810.13407</loc><lastmod>2018-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-inductive-bias-of-words-in-acoustics-to-word-models-1810.13407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-inductive-bias-of-words-in-acoustics-to-word-models-1810.13407"/></url>
<url><loc>https://scifaro.com/en/abs/low-dimensional-bottleneck-features-for-on-device-continuous-speech-recognition-1811.00006</loc><lastmod>2018-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-dimensional-bottleneck-features-for-on-device-continuous-speech-recognition-1811.00006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-dimensional-bottleneck-features-for-on-device-continuous-speech-recognition-1811.00006"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-tube-amplifier-emulation-1811.00334</loc><lastmod>2019-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-tube-amplifier-emulation-1811.00334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-tube-amplifier-emulation-1811.00334"/></url>
<url><loc>https://scifaro.com/en/abs/deep-segment-attentive-embedding-for-duration-robust-speaker-verification-1811.00883</loc><lastmod>2018-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-segment-attentive-embedding-for-duration-robust-speaker-verification-1811.00883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-segment-attentive-embedding-for-duration-robust-speaker-verification-1811.00883"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-target-linearly-constrained-minimum-variance-beamformer-with-spatial-cues-preservation-for-binaural-hearing-aids-1811.01133</loc><lastmod>2019-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-target-linearly-constrained-minimum-variance-beamformer-with-spatial-cues-preservation-for-binaural-hearing-aids-1811.01133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-target-linearly-constrained-minimum-variance-beamformer-with-spatial-cues-preservation-for-binaural-hearing-aids-1811.01133"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-audio-features-for-speech-music-classification-1811.01222</loc><lastmod>2018-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-audio-features-for-speech-music-classification-1811.01222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-audio-features-for-speech-music-classification-1811.01222"/></url>
<url><loc>https://scifaro.com/en/abs/manner-of-articulation-detection-using-connectionist-temporal-classification-to-improve-automatic-speech-recognition-performance-1811.01644</loc><lastmod>2018-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manner-of-articulation-detection-using-connectionist-temporal-classification-to-improve-automatic-speech-recognition-performance-1811.01644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manner-of-articulation-detection-using-connectionist-temporal-classification-to-improve-automatic-speech-recognition-performance-1811.01644"/></url>
<url><loc>https://scifaro.com/en/abs/when-ctc-training-meets-acoustic-landmarks-1811.02063</loc><lastmod>2019-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/when-ctc-training-meets-acoustic-landmarks-1811.02063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/when-ctc-training-meets-acoustic-landmarks-1811.02063"/></url>
<url><loc>https://scifaro.com/en/abs/language-model-integration-based-on-memory-control-for-sequence-to-sequence-speech-recognition-1811.02162</loc><lastmod>2025-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-model-integration-based-on-memory-control-for-sequence-to-sequence-speech-recognition-1811.02162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-model-integration-based-on-memory-control-for-sequence-to-sequence-speech-recognition-1811.02162"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-using-end-to-end-adversarial-language-adaptation-1811.02331</loc><lastmod>2018-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-using-end-to-end-adversarial-language-adaptation-1811.02331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-using-end-to-end-adversarial-language-adaptation-1811.02331"/></url>
<url><loc>https://scifaro.com/en/abs/trainable-adaptive-window-switching-for-speech-enhancement-1811.02438</loc><lastmod>2019-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trainable-adaptive-window-switching-for-speech-enhancement-1811.02438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trainable-adaptive-window-switching-for-speech-enhancement-1811.02438"/></url>
<url><loc>https://scifaro.com/en/abs/bidirectional-quaternion-long-short-term-memory-recurrent-neural-networks-for-speech-recognition-1811.02566</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bidirectional-quaternion-long-short-term-memory-recurrent-neural-networks-for-speech-recognition-1811.02566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bidirectional-quaternion-long-short-term-memory-recurrent-neural-networks-for-speech-recognition-1811.02566"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-based-multichannel-end-to-end-speech-recognition-for-everyday-home-environments-1811.02735</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-based-multichannel-end-to-end-speech-recognition-for-everyday-home-environments-1811.02735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-based-multichannel-end-to-end-speech-recognition-for-everyday-home-environments-1811.02735"/></url>
<url><loc>https://scifaro.com/en/abs/learning-acoustic-word-embeddings-with-phonetically-associated-triplet-network-1811.02736</loc><lastmod>2018-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-acoustic-word-embeddings-with-phonetically-associated-triplet-network-1811.02736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-acoustic-word-embeddings-with-phonetically-associated-triplet-network-1811.02736"/></url>
<url><loc>https://scifaro.com/en/abs/promising-accurate-prefix-boosting-for-sequence-to-sequence-asr-1811.02770</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/promising-accurate-prefix-boosting-for-sequence-to-sequence-asr-1811.02770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/promising-accurate-prefix-boosting-for-sequence-to-sequence-asr-1811.02770"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-dnn-autoencoder-for-robust-speaker-recognition-1811.02938</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-dnn-autoencoder-for-robust-speaker-recognition-1811.02938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-dnn-autoencoder-for-robust-speaker-recognition-1811.02938"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-speech-coding-with-samplernn-1811.03021</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-speech-coding-with-samplernn-1811.03021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-speech-coding-with-samplernn-1811.03021"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-end-to-end-neural-speaker-verification-to-new-languages-and-recording-conditions-with-adversarial-training-1811.03055</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-end-to-end-neural-speaker-verification-to-new-languages-and-recording-conditions-with-adversarial-training-1811.03055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-end-to-end-neural-speaker-verification-to-new-languages-and-recording-conditions-with-adversarial-training-1811.03055"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-speaker-embedding-networks-for-domain-robust-end-to-end-speaker-verification-1811.03063</loc><lastmod>2018-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-speaker-embedding-networks-for-domain-robust-end-to-end-speaker-verification-1811.03063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-speaker-embedding-networks-for-domain-robust-end-to-end-speaker-verification-1811.03063"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-attention-scoring-for-deep-speaker-features-in-speaker-verification-1811.03255</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-attention-scoring-for-deep-speaker-features-in-speaker-verification-1811.03255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-attention-scoring-for-deep-speaker-features-in-speaker-verification-1811.03255"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-constrained-training-for-speaker-verification-1811.03258</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-constrained-training-for-speaker-verification-1811.03258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-constrained-training-for-speaker-verification-1811.03258"/></url>
<url><loc>https://scifaro.com/en/abs/who-do-i-sound-like-showcasing-speaker-recognition-technology-by-youtube-voice-search-1811.03293</loc><lastmod>2019-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-do-i-sound-like-showcasing-speaker-recognition-technology-by-youtube-voice-search-1811.03293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-do-i-sound-like-showcasing-speaker-recognition-technology-by-youtube-voice-search-1811.03293"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptive-neural-vocoders-for-parametric-speech-synthesis-systems-1811.03311</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptive-neural-vocoders-for-parametric-speech-synthesis-systems-1811.03311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptive-neural-vocoders-for-parametric-speech-synthesis-systems-1811.03311"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-multilingual-sequence-to-sequence-speech-recognition-systems-1811.03451</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-multilingual-sequence-to-sequence-speech-recognition-systems-1811.03451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-multilingual-sequence-to-sequence-speech-recognition-systems-1811.03451"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-based-on-reducing-the-detail-portion-of-speech-spectrograms-in-modulation-domain-via-discrete-wavelet-transform-1811.03486</loc><lastmod>2018-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-based-on-reducing-the-detail-portion-of-speech-spectrograms-in-modulation-domain-via-discrete-wavelet-transform-1811.03486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-based-on-reducing-the-detail-portion-of-speech-spectrograms-in-modulation-domain-via-discrete-wavelet-transform-1811.03486"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-use-speaker-recognition-technology-to-attack-itself-enhancing-mimicry-attacks-using-automatic-target-speaker-selection-1811.03790</loc><lastmod>2018-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-use-speaker-recognition-technology-to-attack-itself-enhancing-mimicry-attacks-using-automatic-target-speaker-selection-1811.03790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-use-speaker-recognition-technology-to-attack-itself-enhancing-mimicry-attacks-using-automatic-target-speaker-selection-1811.03790"/></url>
<url><loc>https://scifaro.com/en/abs/joint-acoustic-and-class-inference-for-weakly-supervised-sound-event-detection-1811.04048</loc><lastmod>2018-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-acoustic-and-class-inference-for-weakly-supervised-sound-event-detection-1811.04048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-acoustic-and-class-inference-for-weakly-supervised-sound-event-detection-1811.04048"/></url>
<url><loc>https://scifaro.com/en/abs/atts2s-vc-sequence-to-sequence-voice-conversion-with-attention-and-context-preservation-mechanisms-1811.04076</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atts2s-vc-sequence-to-sequence-voice-conversion-with-attention-and-context-preservation-mechanisms-1811.04076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atts2s-vc-sequence-to-sequence-voice-conversion-with-attention-and-context-preservation-mechanisms-1811.04076"/></url>
<url><loc>https://scifaro.com/en/abs/reinforcement-learning-based-speech-enhancement-for-robust-speech-recognition-1811.04224</loc><lastmod>2018-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reinforcement-learning-based-speech-enhancement-for-robust-speech-recognition-1811.04224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reinforcement-learning-based-speech-enhancement-for-robust-speech-recognition-1811.04224"/></url>
<url><loc>https://scifaro.com/en/abs/excitnet-vocoder-a-neural-excitation-model-for-parametric-speech-synthesis-systems-1811.04769</loc><lastmod>2019-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/excitnet-vocoder-a-neural-excitation-model-for-parametric-speech-synthesis-systems-1811.04769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/excitnet-vocoder-a-neural-excitation-model-for-parametric-speech-synthesis-systems-1811.04769"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-and-unsupervised-subword-modeling-for-zero-resource-languages-1811.04791</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-and-unsupervised-subword-modeling-for-zero-resource-languages-1811.04791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-and-unsupervised-subword-modeling-for-zero-resource-languages-1811.04791"/></url>
<url><loc>https://scifaro.com/en/abs/speech-coding-speech-interfaces-and-iot-opportunities-and-challenges-1811.05720</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-coding-speech-interfaces-and-iot-opportunities-and-challenges-1811.05720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-coding-speech-interfaces-and-iot-opportunities-and-challenges-1811.05720"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-approach-towards-emotion-recognition-of-music-using-audio-and-lyrical-content-1811.05760</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-approach-towards-emotion-recognition-of-music-using-audio-and-lyrical-content-1811.05760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-approach-towards-emotion-recognition-of-music-using-audio-and-lyrical-content-1811.05760"/></url>
<url><loc>https://scifaro.com/en/abs/open-source-platforms-for-fast-room-acoustic-simulations-in-complex-structures-1811.05784</loc><lastmod>2018-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-source-platforms-for-fast-room-acoustic-simulations-in-complex-structures-1811.05784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-source-platforms-for-fast-room-acoustic-simulations-in-complex-structures-1811.05784"/></url>
<url><loc>https://scifaro.com/en/abs/on-training-targets-and-objective-functions-for-deep-learning-based-audio-visual-speech-enhancement-1811.06234</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-training-targets-and-objective-functions-for-deep-learning-based-audio-visual-speech-enhancement-1811.06234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-training-targets-and-objective-functions-for-deep-learning-based-audio-visual-speech-enhancement-1811.06234"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-lombard-reflex-on-the-performance-of-deep-learning-based-audio-visual-speech-enhancement-systems-1811.06250</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-lombard-reflex-on-the-performance-of-deep-learning-based-audio-visual-speech-enhancement-systems-1811.06250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-lombard-reflex-on-the-performance-of-deep-learning-based-audio-visual-speech-enhancement-systems-1811.06250"/></url>
<url><loc>https://scifaro.com/en/abs/towards-achieving-robust-universal-neural-vocoding-1811.06292</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-achieving-robust-universal-neural-vocoding-1811.06292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-achieving-robust-universal-neural-vocoding-1811.06292"/></url>
<url><loc>https://scifaro.com/en/abs/comprehensive-evaluation-of-statistical-speech-waveform-synthesis-1811.06296</loc><lastmod>2018-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comprehensive-evaluation-of-statistical-speech-waveform-synthesis-1811.06296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comprehensive-evaluation-of-statistical-speech-waveform-synthesis-1811.06296"/></url>
<url><loc>https://scifaro.com/en/abs/hcu400-an-annotated-dataset-for-exploring-aural-phenomenology-through-causal-uncertainty-1811.06439</loc><lastmod>2019-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hcu400-an-annotated-dataset-for-exploring-aural-phenomenology-through-causal-uncertainty-1811.06439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hcu400-an-annotated-dataset-for-exploring-aural-phenomenology-through-causal-uncertainty-1811.06439"/></url>
<url><loc>https://scifaro.com/en/abs/building-and-evaluation-of-a-real-room-impulse-response-dataset-1811.06795</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-and-evaluation-of-a-real-room-impulse-response-dataset-1811.06795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-and-evaluation-of-a-real-room-impulse-response-dataset-1811.06795"/></url>
<url><loc>https://scifaro.com/en/abs/soundsignaling-realtime-stylistic-modification-of-a-personal-music-corpus-for-information-delivery-1811.06859</loc><lastmod>2018-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundsignaling-realtime-stylistic-modification-of-a-personal-music-corpus-for-information-delivery-1811.06859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundsignaling-realtime-stylistic-modification-of-a-personal-music-corpus-for-information-delivery-1811.06859"/></url>
<url><loc>https://scifaro.com/en/abs/multipath-enabled-private-audio-with-noise-1811.07065</loc><lastmod>2019-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multipath-enabled-private-audio-with-noise-1811.07065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multipath-enabled-private-audio-with-noise-1811.07065"/></url>
<url><loc>https://scifaro.com/en/abs/the-pytorch-kaldi-speech-recognition-toolkit-1811.07453</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-pytorch-kaldi-speech-recognition-toolkit-1811.07453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-pytorch-kaldi-speech-recognition-toolkit-1811.07453"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-dnn-speech-signal-enhancement-for-robust-speaker-recognition-1811.07629</loc><lastmod>2018-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-dnn-speech-signal-enhancement-for-robust-speaker-recognition-1811.07629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-dnn-speech-signal-enhancement-for-robust-speaker-recognition-1811.07629"/></url>
<url><loc>https://scifaro.com/en/abs/learning-robust-heterogeneous-signal-features-from-parallel-neural-network-for-audio-sentiment-analysis-1811.08065</loc><lastmod>2019-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-robust-heterogeneous-signal-features-from-parallel-neural-network-for-audio-sentiment-analysis-1811.08065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-robust-heterogeneous-signal-features-from-parallel-neural-network-for-audio-sentiment-analysis-1811.08065"/></url>
<url><loc>https://scifaro.com/en/abs/feature-exploration-for-almost-zero-resource-asr-free-keyword-spotting-using-a-multilingual-bottleneck-extractor-and-correspondence-autoencoders-1811.08284</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-exploration-for-almost-zero-resource-asr-free-keyword-spotting-using-a-multilingual-bottleneck-extractor-and-correspondence-autoencoders-1811.08284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-exploration-for-almost-zero-resource-asr-free-keyword-spotting-using-a-multilingual-bottleneck-extractor-and-correspondence-autoencoders-1811.08284"/></url>
<url><loc>https://scifaro.com/en/abs/proceedings-of-the-locata-challenge-workshop-a-satellite-event-of-iwaenc-2018-1811.08482</loc><lastmod>2019-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proceedings-of-the-locata-challenge-workshop-a-satellite-event-of-iwaenc-2018-1811.08482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proceedings-of-the-locata-challenge-workshop-a-satellite-event-of-iwaenc-2018-1811.08482"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-aggregation-of-phase-information-for-reducing-computational-cost-of-cnn-based-doa-estimation-1811.08552</loc><lastmod>2019-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-aggregation-of-phase-information-for-reducing-computational-cost-of-cnn-based-doa-estimation-1811.08552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-aggregation-of-phase-information-for-reducing-computational-cost-of-cnn-based-doa-estimation-1811.08552"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-language-and-classifier-independent-feature-analysis-for-vocal-emotion-recognition-1811.08935</loc><lastmod>2018-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-language-and-classifier-independent-feature-analysis-for-vocal-emotion-recognition-1811.08935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-language-and-classifier-independent-feature-analysis-for-vocal-emotion-recognition-1811.08935"/></url>
<url><loc>https://scifaro.com/en/abs/bytes-are-all-you-need-end-to-end-multilingual-speech-recognition-and-synthesis-with-bytes-1811.09021</loc><lastmod>2018-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bytes-are-all-you-need-end-to-end-multilingual-speech-recognition-and-synthesis-with-bytes-1811.09021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bytes-are-all-you-need-end-to-end-multilingual-speech-recognition-and-synthesis-with-bytes-1811.09021"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-with-quaternion-neural-networks-1811.09678</loc><lastmod>2018-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-with-quaternion-neural-networks-1811.09678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-with-quaternion-neural-networks-1811.09678"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-convolutional-filters-with-sincnet-1811.09725</loc><lastmod>2019-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-convolutional-filters-with-sincnet-1811.09725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-convolutional-filters-with-sincnet-1811.09725"/></url>
<url><loc>https://scifaro.com/en/abs/a-method-for-analysis-of-patient-speech-in-dialogue-for-dementia-detection-1811.09919</loc><lastmod>2018-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-method-for-analysis-of-patient-speech-in-dialogue-for-dementia-detection-1811.09919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-method-for-analysis-of-patient-speech-in-dialogue-for-dementia-detection-1811.09919"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-speaker-retrieval-on-random-speaker-variability-subspace-1811.10812</loc><lastmod>2019-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-speaker-retrieval-on-random-speaker-variability-subspace-1811.10812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-speaker-retrieval-on-random-speaker-variability-subspace-1811.10812"/></url>
<url><loc>https://scifaro.com/en/abs/refined-wavenet-vocoder-for-variational-autoencoder-based-voice-conversion-1811.11078</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refined-wavenet-vocoder-for-variational-autoencoder-based-voice-conversion-1811.11078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refined-wavenet-vocoder-for-variational-autoencoder-based-voice-conversion-1811.11078"/></url>
<url><loc>https://scifaro.com/en/abs/acoustics-guided-evaluation-age-a-new-measure-for-estimating-performance-of-speech-enhancement-algorithms-for-robust-asr-1811.11517</loc><lastmod>2018-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustics-guided-evaluation-age-a-new-measure-for-estimating-performance-of-speech-enhancement-algorithms-for-robust-asr-1811.11517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustics-guided-evaluation-age-a-new-measure-for-estimating-performance-of-speech-enhancement-algorithms-for-robust-asr-1811.11517"/></url>
<url><loc>https://scifaro.com/en/abs/svd-phat-a-fast-sound-source-localization-method-1811.11785</loc><lastmod>2019-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svd-phat-a-fast-sound-source-localization-method-1811.11785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svd-phat-a-fast-sound-source-localization-method-1811.11785"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-the-complexity-and-accuracy-of-direction-of-arrival-estimation-methods-based-on-gcc-phat-for-a-pair-of-close-microphones-1811.11787</loc><lastmod>2018-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-the-complexity-and-accuracy-of-direction-of-arrival-estimation-methods-based-on-gcc-phat-for-a-pair-of-close-microphones-1811.11787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-the-complexity-and-accuracy-of-direction-of-arrival-estimation-methods-based-on-gcc-phat-for-a-pair-of-close-microphones-1811.11787"/></url>
<url><loc>https://scifaro.com/en/abs/lp-wavenet-linear-prediction-based-wavenet-speech-synthesis-1811.11913</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lp-wavenet-linear-prediction-based-wavenet-speech-synthesis-1811.11913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lp-wavenet-linear-prediction-based-wavenet-speech-synthesis-1811.11913"/></url>
<url><loc>https://scifaro.com/en/abs/tuplemax-loss-for-language-identification-1811.12290</loc><lastmod>2019-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tuplemax-loss-for-language-identification-1811.12290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tuplemax-loss-for-language-identification-1811.12290"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-and-optimized-sound-source-localization-and-tracking-methods-for-open-and-closed-microphone-array-configurations-1812.00115</loc><lastmod>2018-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-and-optimized-sound-source-localization-and-tracking-methods-for-open-and-closed-microphone-array-configurations-1812.00115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-and-optimized-sound-source-localization-and-tracking-methods-for-open-and-closed-microphone-array-configurations-1812.00115"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speaker-representations-with-mutual-information-1812.00271</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speaker-representations-with-mutual-information-1812.00271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speaker-representations-with-mutual-information-1812.00271"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-based-ae-dnn-constraint-for-better-late-reverb-suppression-in-multi-channel-lp-formulation-1812.01346</loc><lastmod>2018-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-based-ae-dnn-constraint-for-better-late-reverb-suppression-in-multi-channel-lp-formulation-1812.01346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-based-ae-dnn-constraint-for-better-late-reverb-suppression-in-multi-channel-lp-formulation-1812.01346"/></url>
<url><loc>https://scifaro.com/en/abs/domain-attentive-fusion-for-end-to-end-dialect-identification-with-unknown-target-domain-1812.01501</loc><lastmod>2019-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-attentive-fusion-for-end-to-end-dialect-identification-with-unknown-target-domain-1812.01501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-attentive-fusion-for-end-to-end-dialect-identification-with-unknown-target-domain-1812.01501"/></url>
<url><loc>https://scifaro.com/en/abs/feature-extraction-for-temporal-signal-recognition-an-overview-1812.01780</loc><lastmod>2018-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-extraction-for-temporal-signal-recognition-an-overview-1812.01780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-extraction-for-temporal-signal-recognition-an-overview-1812.01780"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-network-based-speaker-adaptation-for-high-fidelity-wavenet-vocoder-1812.02339</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-network-based-speaker-adaptation-for-high-fidelity-wavenet-vocoder-1812.02339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-network-based-speaker-adaptation-for-high-fidelity-wavenet-vocoder-1812.02339"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-synchronous-dct-features-a-pilot-study-on-speaker-identification-1812.02447</loc><lastmod>2018-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-synchronous-dct-features-a-pilot-study-on-speaker-identification-1812.02447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-synchronous-dct-features-a-pilot-study-on-speaker-identification-1812.02447"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-tracking-lms-and-rls-applied-to-speech-formant-estimation-2000-1812.02705</loc><lastmod>2018-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-tracking-lms-and-rls-applied-to-speech-formant-estimation-2000-1812.02705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-tracking-lms-and-rls-applied-to-speech-formant-estimation-2000-1812.02705"/></url>
<url><loc>https://scifaro.com/en/abs/examples-of-usage-of-nearfield-acoustic-holography-methods-for-far-field-estimations-part-1-cw-signals-1812.03826</loc><lastmod>2018-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/examples-of-usage-of-nearfield-acoustic-holography-methods-for-far-field-estimations-part-1-cw-signals-1812.03826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/examples-of-usage-of-nearfield-acoustic-holography-methods-for-far-field-estimations-part-1-cw-signals-1812.03826"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-by-backtranslation-for-end-to-end-asr-in-low-resource-settings-1812.03919</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-by-backtranslation-for-end-to-end-asr-in-low-resource-settings-1812.03919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-by-backtranslation-for-end-to-end-asr-in-low-resource-settings-1812.03919"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2018-challenge-solution-for-task-5-1812.04618</loc><lastmod>2018-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2018-challenge-solution-for-task-5-1812.04618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2018-challenge-solution-for-task-5-1812.04618"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-multi-speaker-latent-space-to-improve-neural-tts-quick-enrolling-new-speaker-and-enhancing-premium-voice-1812.05253</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-multi-speaker-latent-space-to-improve-neural-tts-quick-enrolling-new-speaker-and-enhancing-premium-voice-1812.05253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-multi-speaker-latent-space-to-improve-neural-tts-quick-enrolling-new-speaker-and-enhancing-premium-voice-1812.05253"/></url>
<url><loc>https://scifaro.com/en/abs/fpets-fully-parallel-end-to-end-text-to-speech-system-1812.05710</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fpets-fully-parallel-end-to-end-text-to-speech-system-1812.05710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fpets-fully-parallel-end-to-end-text-to-speech-system-1812.05710"/></url>
<url><loc>https://scifaro.com/en/abs/speech-and-speaker-recognition-from-raw-waveform-with-sincnet-1812.05920</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-and-speaker-recognition-from-raw-waveform-with-sincnet-1812.05920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-and-speaker-recognition-from-raw-waveform-with-sincnet-1812.05920"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-layered-energy-consumption-model-for-smart-wireless-acoustic-sensor-networks-1812.06672</loc><lastmod>2018-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-layered-energy-consumption-model-for-smart-wireless-acoustic-sensor-networks-1812.06672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-layered-energy-consumption-model-for-smart-wireless-acoustic-sensor-networks-1812.06672"/></url>
<url><loc>https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-detection-and-localization-of-3d-sound-events-1812.06811</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-detection-and-localization-of-3d-sound-events-1812.06811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quaternion-convolutional-neural-networks-for-detection-and-localization-of-3d-sound-events-1812.06811"/></url>
<url><loc>https://scifaro.com/en/abs/pathological-voice-classification-using-mel-cepstrum-vectors-and-support-vector-machine-1812.07729</loc><lastmod>2018-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pathological-voice-classification-using-mel-cepstrum-vectors-and-support-vector-machine-1812.07729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pathological-voice-classification-using-mel-cepstrum-vectors-and-support-vector-machine-1812.07729"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1812.08400</loc><lastmod>2019-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1812.08400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1812.08400"/></url>
<url><loc>https://scifaro.com/en/abs/fr-echet-audio-distance-a-metric-for-evaluating-music-enhancement-algorithms-1812.08466</loc><lastmod>2019-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fr-echet-audio-distance-a-metric-for-evaluating-music-enhancement-algorithms-1812.08466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fr-echet-audio-distance-a-metric-for-evaluating-music-enhancement-algorithms-1812.08466"/></url>
<url><loc>https://scifaro.com/en/abs/the-recognition-of-persian-phonemes-using-ppnet-1812.08600</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-recognition-of-persian-phonemes-using-ppnet-1812.08600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-recognition-of-persian-phonemes-using-ppnet-1812.08600"/></url>
<url><loc>https://scifaro.com/en/abs/multi-domain-processing-via-hybrid-denoising-networks-for-speech-enhancement-1812.08914</loc><lastmod>2018-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-domain-processing-via-hybrid-denoising-networks-for-speech-enhancement-1812.08914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-domain-processing-via-hybrid-denoising-networks-for-speech-enhancement-1812.08914"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-recognition-via-segmental-empirical-output-distribution-matching-1812.09323</loc><lastmod>2018-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-recognition-via-segmental-empirical-output-distribution-matching-1812.09323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-recognition-via-segmental-empirical-output-distribution-matching-1812.09323"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-classification-of-reverberant-rooms-using-dnns-1812.09324</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-classification-of-reverberant-rooms-using-dnns-1812.09324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-classification-of-reverberant-rooms-using-dnns-1812.09324"/></url>
<url><loc>https://scifaro.com/en/abs/pansori-asr-corpus-generation-from-open-online-video-contents-1812.09798</loc><lastmod>2018-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pansori-asr-corpus-generation-from-open-online-video-contents-1812.09798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pansori-asr-corpus-generation-from-open-online-video-contents-1812.09798"/></url>
<url><loc>https://scifaro.com/en/abs/tied-hidden-factors-in-neural-networks-for-end-to-end-speaker-recognition-1812.11946</loc><lastmod>2019-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tied-hidden-factors-in-neural-networks-for-end-to-end-speaker-recognition-1812.11946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tied-hidden-factors-in-neural-networks-for-end-to-end-speaker-recognition-1812.11946"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noise-robustness-of-automatic-speech-recognition-via-parallel-data-and-teacher-student-learning-1901.02348</loc><lastmod>2019-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noise-robustness-of-automatic-speech-recognition-via-parallel-data-and-teacher-student-learning-1901.02348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noise-robustness-of-automatic-speech-recognition-via-parallel-data-and-teacher-student-learning-1901.02348"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-of-room-classifiers-using-generative-adversarial-networks-1901.03257</loc><lastmod>2020-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-of-room-classifiers-using-generative-adversarial-networks-1901.03257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-of-room-classifiers-using-generative-adversarial-networks-1901.03257"/></url>
<url><loc>https://scifaro.com/en/abs/orthonormal-embedding-based-deep-clustering-for-single-channel-speech-separation-1901.04690</loc><lastmod>2019-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/orthonormal-embedding-based-deep-clustering-for-single-channel-speech-separation-1901.04690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/orthonormal-embedding-based-deep-clustering-for-single-channel-speech-separation-1901.04690"/></url>
<url><loc>https://scifaro.com/en/abs/a-linear-programming-approach-to-the-tracking-of-partials-1901.05044</loc><lastmod>2019-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-linear-programming-approach-to-the-tracking-of-partials-1901.05044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-linear-programming-approach-to-the-tracking-of-partials-1901.05044"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-separation-of-non-stationary-sound-fields-on-spheres-1901.05122</loc><lastmod>2019-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-separation-of-non-stationary-sound-fields-on-spheres-1901.05122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-separation-of-non-stationary-sound-fields-on-spheres-1901.05122"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-sound-absorbing-materials-in-a-room-from-a-single-impulse-response-using-a-crnn-1901.05852</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-sound-absorbing-materials-in-a-room-from-a-single-impulse-response-using-a-crnn-1901.05852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-sound-absorbing-materials-in-a-room-from-a-single-impulse-response-using-a-crnn-1901.05852"/></url>
<url><loc>https://scifaro.com/en/abs/learning-sound-representations-using-trainable-cope-feature-extractors-1901.06904</loc><lastmod>2019-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-sound-representations-using-trainable-cope-feature-extractors-1901.06904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-sound-representations-using-trainable-cope-feature-extractors-1901.06904"/></url>
<url><loc>https://scifaro.com/en/abs/non-linear-time-compression-of-clear-and-normal-speech-at-high-rates-1901.07239</loc><lastmod>2019-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-linear-time-compression-of-clear-and-normal-speech-at-high-rates-1901.07239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-linear-time-compression-of-clear-and-normal-speech-at-high-rates-1901.07239"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-networks-for-connectionist-temporal-classification-in-speech-recognition-1901.10055</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-networks-for-connectionist-temporal-classification-in-speech-recognition-1901.10055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-networks-for-connectionist-temporal-classification-in-speech-recognition-1901.10055"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-sampling-audio-adversarial-example-attack-1901.10300</loc><lastmod>2024-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-sampling-audio-adversarial-example-attack-1901.10300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-sampling-audio-adversarial-example-attack-1901.10300"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-neural-network-model-based-on-neutrosophy-for-noisy-speech-recognition-1901.10629</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-neural-network-model-based-on-neutrosophy-for-noisy-speech-recognition-1901.10629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-neural-network-model-based-on-neutrosophy-for-noisy-speech-recognition-1901.10629"/></url>
<url><loc>https://scifaro.com/en/abs/additive-margin-sincnet-for-speaker-recognition-1901.10826</loc><lastmod>2019-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/additive-margin-sincnet-for-speaker-recognition-1901.10826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/additive-margin-sincnet-for-speaker-recognition-1901.10826"/></url>
<url><loc>https://scifaro.com/en/abs/multi-layered-cepstrum-for-instantaneous-frequency-estimation-1902.00539</loc><lastmod>2019-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-layered-cepstrum-for-instantaneous-frequency-estimation-1902.00539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-layered-cepstrum-for-instantaneous-frequency-estimation-1902.00539"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-by-partial-auc-optimization-with-mahalanobis-distance-metric-learning-1902.00889</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-by-partial-auc-optimization-with-mahalanobis-distance-metric-learning-1902.00889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-by-partial-auc-optimization-with-mahalanobis-distance-metric-learning-1902.00889"/></url>
<url><loc>https://scifaro.com/en/abs/overlap-add-windows-with-maximum-energy-concentration-for-speech-and-audio-processing-1902.01053</loc><lastmod>2019-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overlap-add-windows-with-maximum-energy-concentration-for-speech-and-audio-processing-1902.01053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overlap-add-windows-with-maximum-energy-concentration-for-speech-and-audio-processing-1902.01053"/></url>
<url><loc>https://scifaro.com/en/abs/active-acoustic-source-tracking-exploiting-particle-filtering-and-monte-carlo-tree-search-1902.01299</loc><lastmod>2019-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-acoustic-source-tracking-exploiting-particle-filtering-and-monte-carlo-tree-search-1902.01299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-acoustic-source-tracking-exploiting-particle-filtering-and-monte-carlo-tree-search-1902.01299"/></url>
<url><loc>https://scifaro.com/en/abs/using-multi-task-learning-to-improve-the-performance-of-acoustic-to-word-and-conventional-hybrid-models-1902.01951</loc><lastmod>2019-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-multi-task-learning-to-improve-the-performance-of-acoustic-to-word-and-conventional-hybrid-models-1902.01951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-multi-task-learning-to-improve-the-performance-of-acoustic-to-word-and-conventional-hybrid-models-1902.01951"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-losses-based-on-speaker-basis-vectors-and-all-speaker-hard-negative-mining-for-speaker-verification-1902.02455</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-losses-based-on-speaker-basis-vectors-and-all-speaker-hard-negative-mining-for-speaker-verification-1902.02455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-losses-based-on-speaker-basis-vectors-and-all-speaker-hard-negative-mining-for-speaker-verification-1902.02455"/></url>
<url><loc>https://scifaro.com/en/abs/conv-codes-audio-hashing-for-bird-species-classification-1902.02498</loc><lastmod>2019-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conv-codes-audio-hashing-for-bird-species-classification-1902.02498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conv-codes-audio-hashing-for-bird-species-classification-1902.02498"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-extraction-for-overlapped-multi-talker-speaker-verification-1902.02546</loc><lastmod>2019-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-extraction-for-overlapped-multi-talker-speaker-verification-1902.02546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-extraction-for-overlapped-multi-talker-speaker-verification-1902.02546"/></url>
<url><loc>https://scifaro.com/en/abs/a-vocoder-free-wavenet-voice-conversion-with-non-parallel-data-1902.03705</loc><lastmod>2019-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-vocoder-free-wavenet-voice-conversion-with-non-parallel-data-1902.03705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-vocoder-free-wavenet-voice-conversion-with-non-parallel-data-1902.03705"/></url>
<url><loc>https://scifaro.com/en/abs/recurrent-neural-networks-with-stochastic-layers-for-acoustic-novelty-detection-1902.04980</loc><lastmod>2019-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recurrent-neural-networks-with-stochastic-layers-for-acoustic-novelty-detection-1902.04980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recurrent-neural-networks-with-stochastic-layers-for-acoustic-novelty-detection-1902.04980"/></url>
<url><loc>https://scifaro.com/en/abs/theory-plus-code-documentation-of-the-depam-workflow-for-soundscape-description-1902.06659</loc><lastmod>2019-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theory-plus-code-documentation-of-the-depam-workflow-for-soundscape-description-1902.06659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theory-plus-code-documentation-of-the-depam-workflow-for-soundscape-description-1902.06659"/></url>
<url><loc>https://scifaro.com/en/abs/securing-voice-driven-interfaces-against-fake-cloned-audio-attacks-1902.06782</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/securing-voice-driven-interfaces-against-fake-cloned-audio-attacks-1902.06782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/securing-voice-driven-interfaces-against-fake-cloned-audio-attacks-1902.06782"/></url>
<url><loc>https://scifaro.com/en/abs/a-spelling-correction-model-for-end-to-end-speech-recognition-1902.07178</loc><lastmod>2019-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-spelling-correction-model-for-end-to-end-speech-recognition-1902.07178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-spelling-correction-model-for-end-to-end-speech-recognition-1902.07178"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-end-to-end-language-identification-using-attention-based-cnn-blstm-1902.07374</loc><lastmod>2019-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-end-to-end-language-identification-using-attention-based-cnn-blstm-1902.07374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-end-to-end-language-identification-using-attention-based-cnn-blstm-1902.07374"/></url>
<url><loc>https://scifaro.com/en/abs/all-neural-online-source-separation-counting-and-diarization-for-meeting-analysis-1902.07881</loc><lastmod>2019-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-neural-online-source-separation-counting-and-diarization-for-meeting-analysis-1902.07881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-neural-online-source-separation-counting-and-diarization-for-meeting-analysis-1902.07881"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-transfer-learning-in-two-pass-information-bottleneck-based-speaker-diarization-system-for-meetings-1902.08051</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-transfer-learning-in-two-pass-information-bottleneck-based-speaker-diarization-system-for-meetings-1902.08051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-transfer-learning-in-two-pass-information-bottleneck-based-speaker-diarization-system-for-meetings-1902.08051"/></url>
<url><loc>https://scifaro.com/en/abs/channel-adversarial-training-for-cross-channel-text-independent-speaker-recognition-1902.09074</loc><lastmod>2019-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-adversarial-training-for-cross-channel-text-independent-speaker-recognition-1902.09074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-adversarial-training-for-cross-channel-text-independent-speaker-recognition-1902.09074"/></url>
<url><loc>https://scifaro.com/en/abs/directional-embedding-based-semi-supervised-framework-for-bird-vocalization-segmentation-1902.09765</loc><lastmod>2019-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-embedding-based-semi-supervised-framework-for-bird-vocalization-segmentation-1902.09765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-embedding-based-semi-supervised-framework-for-bird-vocalization-segmentation-1902.09765"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-aggregation-for-speaker-recognition-in-the-wild-1902.10107</loc><lastmod>2019-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-aggregation-for-speaker-recognition-in-the-wild-1902.10107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-aggregation-for-speaker-recognition-in-the-wild-1902.10107"/></url>
<url><loc>https://scifaro.com/en/abs/the-voices-from-a-distance-challenge-2019-evaluation-plan-1902.10828</loc><lastmod>2019-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voices-from-a-distance-challenge-2019-evaluation-plan-1902.10828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voices-from-a-distance-challenge-2019-evaluation-plan-1902.10828"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-detection-classification-and-doa-estimation-of-unmanned-aerial-vehicle-1902.11130</loc><lastmod>2019-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-detection-classification-and-doa-estimation-of-unmanned-aerial-vehicle-1902.11130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-detection-classification-and-doa-estimation-of-unmanned-aerial-vehicle-1902.11130"/></url>
<url><loc>https://scifaro.com/en/abs/analysing-deep-learning-spectral-envelope-prediction-methods-for-singing-synthesis-1903.01161</loc><lastmod>2019-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysing-deep-learning-spectral-envelope-prediction-methods-for-singing-synthesis-1903.01161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysing-deep-learning-spectral-envelope-prediction-methods-for-singing-synthesis-1903.01161"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-impulse-responses-for-wearable-audio-devices-1903.02094</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-impulse-responses-for-wearable-audio-devices-1903.02094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-impulse-responses-for-wearable-audio-devices-1903.02094"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-conversion-with-non-parallel-data-1903.04124</loc><lastmod>2019-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-conversion-with-non-parallel-data-1903.04124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-conversion-with-non-parallel-data-1903.04124"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-gap-between-monaural-speech-enhancement-and-recognition-with-distortion-independent-acoustic-modeling-1903.04567</loc><lastmod>2019-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-gap-between-monaural-speech-enhancement-and-recognition-with-distortion-independent-acoustic-modeling-1903.04567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-gap-between-monaural-speech-enhancement-and-recognition-with-distortion-independent-acoustic-modeling-1903.04567"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-multi-channel-acoustic-modeling-for-distant-speech-recognition-1903.05299</loc><lastmod>2021-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-multi-channel-acoustic-modeling-for-distant-speech-recognition-1903.05299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-multi-channel-acoustic-modeling-for-distant-speech-recognition-1903.05299"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-harmonic-percussive-source-separation-via-convex-optimization-1903.05600</loc><lastmod>2019-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-harmonic-percussive-source-separation-via-convex-optimization-1903.05600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-harmonic-percussive-source-separation-via-convex-optimization-1903.05600"/></url>
<url><loc>https://scifaro.com/en/abs/low-rankness-of-complex-valued-spectrogram-and-its-application-to-phase-aware-audio-processing-1903.05603</loc><lastmod>2019-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rankness-of-complex-valued-spectrogram-and-its-application-to-phase-aware-audio-processing-1903.05603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rankness-of-complex-valued-spectrogram-and-its-application-to-phase-aware-audio-processing-1903.05603"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-network-based-glottal-waveform-model-for-statistical-parametric-speech-synthesis-1903.05955</loc><lastmod>2019-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-network-based-glottal-waveform-model-for-statistical-parametric-speech-synthesis-1903.05955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-network-based-glottal-waveform-model-for-statistical-parametric-speech-synthesis-1903.05955"/></url>
<url><loc>https://scifaro.com/en/abs/multi-geometry-spatial-acoustic-modeling-for-distant-speech-recognition-1903.06539</loc><lastmod>2021-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-geometry-spatial-acoustic-modeling-for-distant-speech-recognition-1903.06539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-geometry-spatial-acoustic-modeling-for-distant-speech-recognition-1903.06539"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-using-neural-networks-1903.06908</loc><lastmod>2019-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-using-neural-networks-1903.06908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-using-neural-networks-1903.06908"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-design-of-perfect-reconstruction-filterbank-for-dnn-based-sound-source-enhancement-1903.08876</loc><lastmod>2019-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-design-of-perfect-reconstruction-filterbank-for-dnn-based-sound-source-enhancement-1903.08876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-design-of-perfect-reconstruction-filterbank-for-dnn-based-sound-source-enhancement-1903.08876"/></url>
<url><loc>https://scifaro.com/en/abs/towards-adversarial-learning-of-speaker-invariant-representation-for-speech-emotion-recognition-1903.09606</loc><lastmod>2019-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-adversarial-learning-of-speaker-invariant-representation-for-speech-emotion-recognition-1903.09606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-adversarial-learning-of-speaker-invariant-representation-for-speech-emotion-recognition-1903.09606"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-speaker-extraction-neural-network-with-magnitude-and-temporal-spectrum-approximation-loss-1903.09952</loc><lastmod>2019-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-speaker-extraction-neural-network-with-magnitude-and-temporal-spectrum-approximation-loss-1903.09952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-speaker-extraction-neural-network-with-magnitude-and-temporal-spectrum-approximation-loss-1903.09952"/></url>
<url><loc>https://scifaro.com/en/abs/imperceptible-robust-and-targeted-adversarial-examples-for-automatic-speech-recognition-1903.10346</loc><lastmod>2019-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imperceptible-robust-and-targeted-adversarial-examples-for-automatic-speech-recognition-1903.10346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imperceptible-robust-and-targeted-adversarial-examples-for-automatic-speech-recognition-1903.10346"/></url>
<url><loc>https://scifaro.com/en/abs/multiscale-cnn-based-deep-metric-learning-for-bioacoustic-classification-overcoming-training-data-scarcity-using-dynamic-triplet-loss-1903.10713</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiscale-cnn-based-deep-metric-learning-for-bioacoustic-classification-overcoming-training-data-scarcity-using-dynamic-triplet-loss-1903.10713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiscale-cnn-based-deep-metric-learning-for-bioacoustic-classification-overcoming-training-data-scarcity-using-dynamic-triplet-loss-1903.10713"/></url>
<url><loc>https://scifaro.com/en/abs/irrelevant-speech-effect-in-open-plan-offices-a-laboratory-study-1903.11386</loc><lastmod>2019-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/irrelevant-speech-effect-in-open-plan-offices-a-laboratory-study-1903.11386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/irrelevant-speech-effect-in-open-plan-offices-a-laboratory-study-1903.11386"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-with-high-order-statistics-for-x-vector-based-text-independent-speaker-verification-1903.12058</loc><lastmod>2019-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-with-high-order-statistics-for-x-vector-based-text-independent-speaker-verification-1903.12058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-with-high-order-statistics-for-x-vector-based-text-independent-speaker-verification-1903.12058"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-wideband-neural-vocoder-at-1-6-kb-s-using-lpcnet-1903.12087</loc><lastmod>2019-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-wideband-neural-vocoder-at-1-6-kb-s-using-lpcnet-1903.12087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-wideband-neural-vocoder-at-1-6-kb-s-using-lpcnet-1903.12087"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-embeddings-with-gating-mechanisms-for-text-independent-speaker-verification-1903.12092</loc><lastmod>2019-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-embeddings-with-gating-mechanisms-for-text-independent-speaker-verification-1903.12092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-embeddings-with-gating-mechanisms-for-text-independent-speaker-verification-1903.12092"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-approximate-inference-for-speech-to-electroglottograph-conversion-1903.12248</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-approximate-inference-for-speech-to-electroglottograph-conversion-1903.12248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-approximate-inference-for-speech-to-electroglottograph-conversion-1903.12248"/></url>
<url><loc>https://scifaro.com/en/abs/does-the-lombard-effect-improve-emotional-communication-in-noise-analysis-of-emotional-speech-acted-in-noise-1903.12316</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-the-lombard-effect-improve-emotional-communication-in-noise-analysis-of-emotional-speech-acted-in-noise-1903.12316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-the-lombard-effect-improve-emotional-communication-in-noise-analysis-of-emotional-speech-acted-in-noise-1903.12316"/></url>
<url><loc>https://scifaro.com/en/abs/joint-training-framework-for-text-to-speech-and-voice-conversion-using-multi-source-tacotron-and-wavenet-1903.12389</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-training-framework-for-text-to-speech-and-voice-conversion-using-multi-source-tacotron-and-wavenet-1903.12389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-training-framework-for-text-to-speech-and-voice-conversion-using-multi-source-tacotron-and-wavenet-1903.12389"/></url>
<url><loc>https://scifaro.com/en/abs/training-a-neural-speech-waveform-model-using-spectral-losses-of-short-time-fourier-transform-and-continuous-wavelet-transform-1903.12392</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-a-neural-speech-waveform-model-using-spectral-losses-of-short-time-fourier-transform-and-continuous-wavelet-transform-1903.12392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-a-neural-speech-waveform-model-using-spectral-losses-of-short-time-fourier-transform-and-continuous-wavelet-transform-1903.12392"/></url>
<url><loc>https://scifaro.com/en/abs/ustcspeech-system-for-voices-from-a-distance-challenge-2019-1903.12428</loc><lastmod>2019-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ustcspeech-system-for-voices-from-a-distance-challenge-2019-1903.12428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ustcspeech-system-for-voices-from-a-distance-challenge-2019-1903.12428"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-approach-to-automatic-speech-assessment-for-cantonese-speaking-people-with-aphasia-1904.00361</loc><lastmod>2019-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-approach-to-automatic-speech-assessment-for-cantonese-speaking-people-with-aphasia-1904.00361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-approach-to-automatic-speech-assessment-for-cantonese-speaking-people-with-aphasia-1904.00361"/></url>
<url><loc>https://scifaro.com/en/abs/training-multi-speaker-neural-text-to-speech-systems-using-speaker-imbalanced-speech-corpora-1904.00771</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-multi-speaker-neural-text-to-speech-systems-using-speaker-imbalanced-speech-corpora-1904.00771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-multi-speaker-neural-text-to-speech-systems-using-speaker-imbalanced-speech-corpora-1904.00771"/></url>
<url><loc>https://scifaro.com/en/abs/room-geometry-estimation-from-room-impulse-responses-using-convolutional-neural-networks-1904.00869</loc><lastmod>2019-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-geometry-estimation-from-room-impulse-responses-using-convolutional-neural-networks-1904.00869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-geometry-estimation-from-room-impulse-responses-using-convolutional-neural-networks-1904.00869"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-by-parametric-resynthesis-1904.01537</loc><lastmod>2019-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-by-parametric-resynthesis-1904.01537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-by-parametric-resynthesis-1904.01537"/></url>
<url><loc>https://scifaro.com/en/abs/learning-shared-encoding-representation-for-end-to-end-speech-recognition-models-1904.02147</loc><lastmod>2019-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-shared-encoding-representation-for-end-to-end-speech-recognition-models-1904.02147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-shared-encoding-representation-for-end-to-end-speech-recognition-models-1904.02147"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-of-sound-events-with-hidden-imbalances-based-on-clustering-and-separate-sub-dictionary-learning-1904.02852</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-of-sound-events-with-hidden-imbalances-based-on-clustering-and-separate-sub-dictionary-learning-1904.02852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-of-sound-events-with-hidden-imbalances-based-on-clustering-and-separate-sub-dictionary-learning-1904.02852"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-features-for-robust-detection-of-acoustic-events-in-sleep-disordered-breathing-1904.02992</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-features-for-robust-detection-of-acoustic-events-in-sleep-disordered-breathing-1904.02992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-features-for-robust-detection-of-acoustic-events-in-sleep-disordered-breathing-1904.02992"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-deep-neural-networks-and-head-movements-for-robust-binaural-localisation-of-multiple-sources-in-reverberant-environments-1904.03001</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-deep-neural-networks-and-head-movements-for-robust-binaural-localisation-of-multiple-sources-in-reverberant-environments-1904.03001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-deep-neural-networks-and-head-movements-for-robust-binaural-localisation-of-multiple-sources-in-reverberant-environments-1904.03001"/></url>
<url><loc>https://scifaro.com/en/abs/robust-binaural-localization-of-a-target-sound-source-by-combining-spectral-source-models-and-deep-neural-networks-1904.03006</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-binaural-localization-of-a-target-sound-source-by-combining-spectral-source-models-and-deep-neural-networks-1904.03006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-binaural-localization-of-a-target-sound-source-by-combining-spectral-source-models-and-deep-neural-networks-1904.03006"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-low-latency-speech-enhancement-with-rt-gcc-nmf-1904.03130</loc><lastmod>2019-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-low-latency-speech-enhancement-with-rt-gcc-nmf-1904.03130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-low-latency-speech-enhancement-with-rt-gcc-nmf-1904.03130"/></url>
<url><loc>https://scifaro.com/en/abs/jasper-an-end-to-end-convolutional-neural-acoustic-model-1904.03288</loc><lastmod>2019-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jasper-an-end-to-end-convolutional-neural-acoustic-model-1904.03288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jasper-an-end-to-end-convolutional-neural-acoustic-model-1904.03288"/></url>
<url><loc>https://scifaro.com/en/abs/voiceid-loss-speech-enhancement-for-speaker-verification-1904.03601</loc><lastmod>2019-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceid-loss-speech-enhancement-for-speaker-verification-1904.03601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceid-loss-speech-enhancement-for-speaker-verification-1904.03601"/></url>
<url><loc>https://scifaro.com/en/abs/speech-model-pre-training-for-end-to-end-spoken-language-understanding-1904.03670</loc><lastmod>2019-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-model-pre-training-for-end-to-end-spoken-language-understanding-1904.03670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-model-pre-training-for-end-to-end-spoken-language-understanding-1904.03670"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-audio-visual-speech-separation-1904.03760</loc><lastmod>2019-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-audio-visual-speech-separation-1904.03760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-audio-visual-speech-separation-1904.03760"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speaker-dependent-separation-for-chime-5-challenge-1904.03792</loc><lastmod>2019-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speaker-dependent-separation-for-chime-5-challenge-1904.03792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speaker-dependent-separation-for-chime-5-challenge-1904.03792"/></url>
<url><loc>https://scifaro.com/en/abs/gelp-gan-excited-linear-prediction-for-speech-synthesis-from-mel-spectrogram-1904.03976</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gelp-gan-excited-linear-prediction-for-speech-synthesis-from-mel-spectrogram-1904.03976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gelp-gan-excited-linear-prediction-for-speech-synthesis-from-mel-spectrogram-1904.03976"/></url>
<url><loc>https://scifaro.com/en/abs/parrotron-an-end-to-end-speech-to-speech-conversion-model-and-its-applications-to-hearing-impaired-speech-and-speech-separation-1904.04169</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parrotron-an-end-to-end-speech-to-speech-conversion-model-and-its-applications-to-hearing-impaired-speech-and-speech-separation-1904.04169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parrotron-an-end-to-end-speech-to-speech-conversion-model-and-its-applications-to-hearing-impaired-speech-and-speech-separation-1904.04169"/></url>
<url><loc>https://scifaro.com/en/abs/factorization-of-discriminatively-trained-i-vector-extractor-for-speaker-recognition-1904.04235</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/factorization-of-discriminatively-trained-i-vector-extractor-for-speaker-recognition-1904.04235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/factorization-of-discriminatively-trained-i-vector-extractor-for-speaker-recognition-1904.04235"/></url>
<url><loc>https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-1904.04240</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-1904.04240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mce-2018-the-1st-multi-target-speaker-detection-and-identification-challenge-evaluation-1904.04240"/></url>
<url><loc>https://scifaro.com/en/abs/audio-classification-of-bit-representation-waveform-1904.04364</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-classification-of-bit-representation-waveform-1904.04364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-classification-of-bit-representation-waveform-1904.04364"/></url>
<url><loc>https://scifaro.com/en/abs/probability-density-distillation-with-generative-adversarial-networks-for-high-quality-parallel-waveform-generation-1904.04472</loc><lastmod>2019-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probability-density-distillation-with-generative-adversarial-networks-for-high-quality-parallel-waveform-generation-1904.04472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probability-density-distillation-with-generative-adversarial-networks-for-high-quality-parallel-waveform-generation-1904.04472"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-speech-enhancement-with-residual-connections-1904.04511</loc><lastmod>2019-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-speech-enhancement-with-residual-connections-1904.04511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-speech-enhancement-with-residual-connections-1904.04511"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-models-for-spoofing-detection-in-automatic-speaker-verification-1904.04589</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-models-for-spoofing-detection-in-automatic-speaker-verification-1904.04589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-models-for-spoofing-detection-in-automatic-speaker-verification-1904.04589"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-wide-residual-networks-in-reverberant-environments-1904.05167</loc><lastmod>2019-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-wide-residual-networks-in-reverberant-environments-1904.05167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-wide-residual-networks-in-reverberant-environments-1904.05167"/></url>
<url><loc>https://scifaro.com/en/abs/rawnet-fast-end-to-end-neural-vocoder-1904.05351</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rawnet-fast-end-to-end-neural-vocoder-1904.05351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rawnet-fast-end-to-end-neural-vocoder-1904.05351"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2019-future-horizons-in-spoofed-and-fake-audio-detection-1904.05441</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2019-future-horizons-in-spoofed-and-fake-audio-detection-1904.05441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2019-future-horizons-in-spoofed-and-fake-audio-detection-1904.05441"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-domain-adaptation-based-on-disentangled-representation-learning-for-robust-speech-recognition-1904.06086</loc><lastmod>2019-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-domain-adaptation-based-on-disentangled-representation-learning-for-robust-speech-recognition-1904.06086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-domain-adaptation-based-on-disentangled-representation-learning-for-robust-speech-recognition-1904.06086"/></url>
<url><loc>https://scifaro.com/en/abs/examining-the-mapping-functions-of-denoising-autoencoders-in-singing-voice-separation-1904.06157</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/examining-the-mapping-functions-of-denoising-autoencoders-in-singing-voice-separation-1904.06157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/examining-the-mapping-functions-of-denoising-autoencoders-in-singing-voice-separation-1904.06157"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-speaker-independent-continuous-speech-separation-1904.06478</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-speaker-independent-continuous-speech-separation-1904.06478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-speaker-independent-continuous-speech-separation-1904.06478"/></url>
<url><loc>https://scifaro.com/en/abs/audio-compression-using-graph-based-transform-1904.06588</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-compression-using-graph-based-transform-1904.06588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-compression-using-graph-based-transform-1904.06588"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-doa-estimation-method-for-a-linear-microphone-array-under-reverberant-and-noisy-environments-1904.06648</loc><lastmod>2019-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-doa-estimation-method-for-a-linear-microphone-array-under-reverberant-and-noisy-environments-1904.06648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-doa-estimation-method-for-a-linear-microphone-array-under-reverberant-and-noisy-environments-1904.06648"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-based-on-convolutional-neural-networks-1904.06868</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-convolutional-neural-networks-1904.06868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-convolutional-neural-networks-1904.06868"/></url>
<url><loc>https://scifaro.com/en/abs/rhr-net-a-residual-hourglass-recurrent-neural-network-for-speech-enhancement-1904.07294</loc><lastmod>2019-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhr-net-a-residual-hourglass-recurrent-neural-network-for-speech-enhancement-1904.07294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhr-net-a-residual-hourglass-recurrent-neural-network-for-speech-enhancement-1904.07294"/></url>
<url><loc>https://scifaro.com/en/abs/i4u-submission-to-nist-sre-2018-leveraging-from-a-decade-of-shared-experiences-1904.07386</loc><lastmod>2019-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i4u-submission-to-nist-sre-2018-leveraging-from-a-decade-of-shared-experiences-1904.07386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i4u-submission-to-nist-sre-2018-leveraging-from-a-decade-of-shared-experiences-1904.07386"/></url>
<url><loc>https://scifaro.com/en/abs/spoof-detection-using-time-delay-shallow-neural-network-and-feature-switching-1904.07453</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoof-detection-using-time-delay-shallow-neural-network-and-feature-switching-1904.07453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoof-detection-using-time-delay-shallow-neural-network-and-feature-switching-1904.07453"/></url>
<url><loc>https://scifaro.com/en/abs/speechyolo-detection-and-localization-of-speech-objects-1904.07704</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechyolo-detection-and-localization-of-speech-objects-1904.07704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechyolo-detection-and-localization-of-speech-objects-1904.07704"/></url>
<url><loc>https://scifaro.com/en/abs/rawnet-advanced-end-to-end-deep-neural-network-using-raw-waveforms-for-text-independent-speaker-verification-1904.08104</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rawnet-advanced-end-to-end-deep-neural-network-using-raw-waveforms-for-text-independent-speaker-verification-1904.08104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rawnet-advanced-end-to-end-deep-neural-network-using-raw-waveforms-for-text-independent-speaker-verification-1904.08104"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-speech-enhancement-and-recognition-losses-in-limited-resources-multi-talker-single-channel-audio-visual-asr-1904.08248</loc><lastmod>2019-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-speech-enhancement-and-recognition-losses-in-limited-resources-multi-talker-single-channel-audio-visual-asr-1904.08248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-speech-enhancement-and-recognition-losses-in-limited-resources-multi-talker-single-channel-audio-visual-asr-1904.08248"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-speaker-recognition-using-deep-neural-networks-1904.08775</loc><lastmod>2019-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-speaker-recognition-using-deep-neural-networks-1904.08775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-speaker-recognition-using-deep-neural-networks-1904.08775"/></url>
<url><loc>https://scifaro.com/en/abs/specaugment-a-simple-data-augmentation-method-for-automatic-speech-recognition-1904.08779</loc><lastmod>2019-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specaugment-a-simple-data-augmentation-method-for-automatic-speech-recognition-1904.08779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specaugment-a-simple-data-augmentation-method-for-automatic-speech-recognition-1904.08779"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-native-language-information-for-improved-accented-speech-recognition-1904.09038</loc><lastmod>2019-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-native-language-information-for-improved-accented-speech-recognition-1904.09038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-native-language-information-for-improved-accented-speech-recognition-1904.09038"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-end-to-end-multichannel-speech-recognition-for-reverberant-and-mismatch-conditions-1904.09049</loc><lastmod>2019-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-end-to-end-multichannel-speech-recognition-for-reverberant-and-mismatch-conditions-1904.09049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-end-to-end-multichannel-speech-recognition-for-reverberant-and-mismatch-conditions-1904.09049"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-spelling-correction-with-transformer-for-ctc-based-end-to-end-speech-recognition-1904.10045</loc><lastmod>2019-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-spelling-correction-with-transformer-for-ctc-based-end-to-end-speech-recognition-1904.10045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-spelling-correction-with-transformer-for-ctc-based-end-to-end-speech-recognition-1904.10045"/></url>
<url><loc>https://scifaro.com/en/abs/replay-attack-detection-with-complementary-high-resolution-information-using-end-to-end-dnn-for-the-asvspoof-2019-challenge-1904.10134</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/replay-attack-detection-with-complementary-high-resolution-information-using-end-to-end-dnn-for-the-asvspoof-2019-challenge-1904.10134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/replay-attack-detection-with-complementary-high-resolution-information-using-end-to-end-dnn-for-the-asvspoof-2019-challenge-1904.10134"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-teacher-student-learning-with-soft-labels-1904.10135</loc><lastmod>2019-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-teacher-student-learning-with-soft-labels-1904.10135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-teacher-student-learning-with-soft-labels-1904.10135"/></url>
<url><loc>https://scifaro.com/en/abs/towards-joint-sound-scene-and-polyphonic-sound-event-recognition-1904.10408</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-joint-sound-scene-and-polyphonic-sound-event-recognition-1904.10408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-joint-sound-scene-and-polyphonic-sound-event-recognition-1904.10408"/></url>
<url><loc>https://scifaro.com/en/abs/the-analogue-computer-as-a-voltage-controlled-synthesiser-1904.10763</loc><lastmod>2019-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-analogue-computer-as-a-voltage-controlled-synthesiser-1904.10763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-analogue-computer-as-a-voltage-controlled-synthesiser-1904.10763"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-multi-hop-attention-mechanism-1904.10788</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-multi-hop-attention-mechanism-1904.10788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-multi-hop-attention-mechanism-1904.10788"/></url>
<url><loc>https://scifaro.com/en/abs/latent-class-model-with-application-to-speaker-diarization-1904.11130</loc><lastmod>2019-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-class-model-with-application-to-speaker-diarization-1904.11130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-class-model-with-application-to-speaker-diarization-1904.11130"/></url>
<url><loc>https://scifaro.com/en/abs/improving-deep-speech-denoising-by-noisy2noisy-signal-mapping-1904.12069</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-deep-speech-denoising-by-noisy2noisy-signal-mapping-1904.12069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-deep-speech-denoising-by-noisy2noisy-signal-mapping-1904.12069"/></url>
<url><loc>https://scifaro.com/en/abs/neural-source-filter-waveform-models-for-statistical-parametric-speech-synthesis-1904.12088</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-source-filter-waveform-models-for-statistical-parametric-speech-synthesis-1904.12088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-source-filter-waveform-models-for-statistical-parametric-speech-synthesis-1904.12088"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-acoustic-event-detection-based-on-tri-training-1904.12926</loc><lastmod>2019-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-acoustic-event-detection-based-on-tri-training-1904.12926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-acoustic-event-detection-based-on-tri-training-1904.12926"/></url>
<url><loc>https://scifaro.com/en/abs/interfacing-pdm-mems-microphones-with-pfm-spiking-systems-application-for-neuromorphic-auditory-sensors-1905.00390</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interfacing-pdm-mems-microphones-with-pfm-spiking-systems-application-for-neuromorphic-auditory-sensors-1905.00390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interfacing-pdm-mems-microphones-with-pfm-spiking-systems-application-for-neuromorphic-auditory-sensors-1905.00390"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-lightweight-and-adaptable-tts-using-lpcnet-1905.00590</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-lightweight-and-adaptable-tts-using-lpcnet-1905.00590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-lightweight-and-adaptable-tts-using-lpcnet-1905.00590"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-f0-conditioning-and-fully-convolutional-networks-in-variational-autoencoder-based-voice-conversion-1905.00615</loc><lastmod>2019-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-f0-conditioning-and-fully-convolutional-networks-in-variational-autoencoder-based-voice-conversion-1905.00615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-f0-conditioning-and-fully-convolutional-networks-in-variational-autoencoder-based-voice-conversion-1905.00615"/></url>
<url><loc>https://scifaro.com/en/abs/psychoacoustically-motivated-audio-declipping-based-on-weighted-l1-minimization-1905.00628</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/psychoacoustically-motivated-audio-declipping-based-on-weighted-l1-minimization-1905.00628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/psychoacoustically-motivated-audio-declipping-based-on-weighted-l1-minimization-1905.00628"/></url>
<url><loc>https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-low-rank-matrix-factorization-and-quantization-training-1905.00855</loc><lastmod>2019-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-low-rank-matrix-factorization-and-quantization-training-1905.00855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-low-rank-matrix-factorization-and-quantization-training-1905.00855"/></url>
<url><loc>https://scifaro.com/en/abs/city-classification-from-multiple-real-world-sound-scenes-1905.00979</loc><lastmod>2019-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/city-classification-from-multiple-real-world-sound-scenes-1905.00979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/city-classification-from-multiple-real-world-sound-scenes-1905.00979"/></url>
<url><loc>https://scifaro.com/en/abs/a-feature-learning-siamese-model-for-intelligent-control-of-the-dynamic-range-compressor-1905.01022</loc><lastmod>2019-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-feature-learning-siamese-model-for-intelligent-control-of-the-dynamic-range-compressor-1905.01022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-feature-learning-siamese-model-for-intelligent-control-of-the-dynamic-range-compressor-1905.01022"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-sequence-to-sequence-asr-using-unpaired-speech-and-text-1905.01152</loc><lastmod>2019-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-sequence-to-sequence-asr-using-unpaired-speech-and-text-1905.01152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-sequence-to-sequence-asr-using-unpaired-speech-and-text-1905.01152"/></url>
<url><loc>https://scifaro.com/en/abs/many-to-many-voice-conversion-with-out-of-dataset-speaker-support-1905.02525</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-with-out-of-dataset-speaker-support-1905.02525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-with-out-of-dataset-speaker-support-1905.02525"/></url>
<url><loc>https://scifaro.com/en/abs/meeting-transcription-using-virtual-microphone-arrays-1905.02545</loc><lastmod>2019-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meeting-transcription-using-virtual-microphone-arrays-1905.02545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meeting-transcription-using-virtual-microphone-arrays-1905.02545"/></url>
<url><loc>https://scifaro.com/en/abs/transparent-pronunciation-scoring-using-articulatorily-weighted-phoneme-edit-distance-1905.02639</loc><lastmod>2019-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transparent-pronunciation-scoring-using-articulatorily-weighted-phoneme-edit-distance-1905.02639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transparent-pronunciation-scoring-using-articulatorily-weighted-phoneme-edit-distance-1905.02639"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-speech-emotion-recognition-with-ladder-networks-1905.02921</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-speech-emotion-recognition-with-ladder-networks-1905.02921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-speech-emotion-recognition-with-ladder-networks-1905.02921"/></url>
<url><loc>https://scifaro.com/en/abs/adversarially-trained-autoencoders-for-parallel-data-free-voice-conversion-1905.03864</loc><lastmod>2019-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarially-trained-autoencoders-for-parallel-data-free-voice-conversion-1905.03864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarially-trained-autoencoders-for-parallel-data-free-voice-conversion-1905.03864"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-lcmv-beamforming-with-partial-noise-estimation-1905.04050</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-lcmv-beamforming-with-partial-noise-estimation-1905.04050"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-lcmv-beamforming-with-partial-noise-estimation-1905.04050"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-and-population-based-training-for-voice-commands-recognition-1905.04230</loc><lastmod>2019-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-and-population-based-training-for-voice-commands-recognition-1905.04230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-and-population-based-training-for-voice-commands-recognition-1905.04230"/></url>
<url><loc>https://scifaro.com/en/abs/improving-opus-low-bit-rate-quality-with-neural-speech-synthesis-1905.04628</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-opus-low-bit-rate-quality-with-neural-speech-synthesis-1905.04628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-opus-low-bit-rate-quality-with-neural-speech-synthesis-1905.04628"/></url>
<url><loc>https://scifaro.com/en/abs/autovc-zero-shot-voice-style-transfer-with-only-autoencoder-loss-1905.05879</loc><lastmod>2019-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autovc-zero-shot-voice-style-transfer-with-only-autoencoder-loss-1905.05879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autovc-zero-shot-voice-style-transfer-with-only-autoencoder-loss-1905.05879"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-purpose-deep-learning-approach-to-model-time-varying-audio-effects-1905.06148</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-purpose-deep-learning-approach-to-model-time-varying-audio-effects-1905.06148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-purpose-deep-learning-approach-to-model-time-varying-audio-effects-1905.06148"/></url>
<url><loc>https://scifaro.com/en/abs/almost-unsupervised-text-to-speech-and-automatic-speech-recognition-1905.06791</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/almost-unsupervised-text-to-speech-and-automatic-speech-recognition-1905.06791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/almost-unsupervised-text-to-speech-and-automatic-speech-recognition-1905.06791"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-speech-driven-visual-speech-synthesis-using-domain-adapted-acoustic-models-1905.06860</loc><lastmod>2019-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-speech-driven-visual-speech-synthesis-using-domain-adapted-acoustic-models-1905.06860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-speech-driven-visual-speech-synthesis-using-domain-adapted-acoustic-models-1905.06860"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-adaptation-with-backpropagation-through-wfst-for-on-device-speech-recognition-system-1905.07149</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-adaptation-with-backpropagation-through-wfst-for-on-device-speech-recognition-system-1905.07149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-adaptation-with-backpropagation-through-wfst-for-on-device-speech-recognition-system-1905.07149"/></url>
<url><loc>https://scifaro.com/en/abs/effective-parameter-estimation-methods-for-an-excitnet-model-in-generative-text-to-speech-systems-1905.08486</loc><lastmod>2019-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-parameter-estimation-methods-for-an-excitnet-model-in-generative-text-to-speech-systems-1905.08486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-parameter-estimation-methods-for-an-excitnet-model-in-generative-text-to-speech-systems-1905.08486"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-speech-presence-probability-estimation-for-multi-frame-single-microphone-speech-enhancement-1905.08492</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-speech-presence-probability-estimation-for-multi-frame-single-microphone-speech-enhancement-1905.08492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-speech-presence-probability-estimation-for-multi-frame-single-microphone-speech-enhancement-1905.08492"/></url>
<url><loc>https://scifaro.com/en/abs/human-vocal-sentiment-analysis-1905.08632</loc><lastmod>2019-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-vocal-sentiment-analysis-1905.08632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-vocal-sentiment-analysis-1905.08632"/></url>
<url><loc>https://scifaro.com/en/abs/a-perceptual-weighting-filter-loss-for-dnn-training-in-speech-enhancement-1905.09754</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-perceptual-weighting-filter-loss-for-dnn-training-in-speech-enhancement-1905.09754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-perceptual-weighting-filter-loss-for-dnn-training-in-speech-enhancement-1905.09754"/></url>
<url><loc>https://scifaro.com/en/abs/fast-computation-of-loudness-using-a-deep-neural-network-1905.10399</loc><lastmod>2019-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-computation-of-loudness-using-a-deep-neural-network-1905.10399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-computation-of-loudness-using-a-deep-neural-network-1905.10399"/></url>
<url><loc>https://scifaro.com/en/abs/uwb-ntis-speaker-diarization-system-for-the-dihard-ii-2019-challenge-1905.11276</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uwb-ntis-speaker-diarization-system-for-the-dihard-ii-2019-challenge-1905.11276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uwb-ntis-speaker-diarization-system-for-the-dihard-ii-2019-challenge-1905.11276"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-quality-control-and-enhancement-for-voice-based-remote-parkinson-s-disease-detection-1905.11785</loc><lastmod>2019-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-quality-control-and-enhancement-for-voice-based-remote-parkinson-s-disease-detection-1905.11785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-quality-control-and-enhancement-for-voice-based-remote-parkinson-s-disease-detection-1905.11785"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-audio-representation-learning-for-mobile-devices-1905.11796</loc><lastmod>2019-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-audio-representation-learning-for-mobile-devices-1905.11796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-audio-representation-learning-for-mobile-devices-1905.11796"/></url>
<url><loc>https://scifaro.com/en/abs/signaltrain-profiling-audio-compressors-with-deep-neural-networks-1905.11928</loc><lastmod>2019-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signaltrain-profiling-audio-compressors-with-deep-neural-networks-1905.11928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signaltrain-profiling-audio-compressors-with-deep-neural-networks-1905.11928"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-the-effectiveness-of-voice-conversion-on-speaker-identification-and-automatic-speech-recognition-systems-1905.12531</loc><lastmod>2019-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-the-effectiveness-of-voice-conversion-on-speaker-identification-and-automatic-speech-recognition-systems-1905.12531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-the-effectiveness-of-voice-conversion-on-speaker-identification-and-automatic-speech-recognition-systems-1905.12531"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-audio-visual-speech-enhancement-in-presence-of-lombard-effect-1905.12605</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-audio-visual-speech-enhancement-in-presence-of-lombard-effect-1905.12605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-audio-visual-speech-enhancement-in-presence-of-lombard-effect-1905.12605"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymization-using-x-vector-and-neural-waveform-models-1905.13561</loc><lastmod>2019-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymization-using-x-vector-and-neural-waveform-models-1905.13561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymization-using-x-vector-and-neural-waveform-models-1905.13561"/></url>
<url><loc>https://scifaro.com/en/abs/musical-composition-style-transfer-via-disentangled-timbre-representations-1905.13567</loc><lastmod>2019-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-composition-style-transfer-via-disentangled-timbre-representations-1905.13567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-composition-style-transfer-via-disentangled-timbre-representations-1905.13567"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-non-aligned-musical-score-transcriptions-with-mv2h-1906.00566</loc><lastmod>2019-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-non-aligned-musical-score-transcriptions-with-mv2h-1906.00566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-non-aligned-musical-score-transcriptions-with-mv2h-1906.00566"/></url>
<url><loc>https://scifaro.com/en/abs/increasing-compactness-of-deep-learning-based-speech-enhancement-models-with-parameter-pruning-and-quantization-techniques-1906.01078</loc><lastmod>2020-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/increasing-compactness-of-deep-learning-based-speech-enhancement-models-with-parameter-pruning-and-quantization-techniques-1906.01078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/increasing-compactness-of-deep-learning-based-speech-enhancement-models-with-parameter-pruning-and-quantization-techniques-1906.01078"/></url>
<url><loc>https://scifaro.com/en/abs/melnet-a-generative-model-for-audio-in-the-frequency-domain-1906.01083</loc><lastmod>2019-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melnet-a-generative-model-for-audio-in-the-frequency-domain-1906.01083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melnet-a-generative-model-for-audio-in-the-frequency-domain-1906.01083"/></url>
<url><loc>https://scifaro.com/en/abs/voice-mimicry-attacks-assisted-by-automatic-speaker-verification-1906.01454</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-mimicry-attacks-assisted-by-automatic-speaker-verification-1906.01454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-mimicry-attacks-assisted-by-automatic-speaker-verification-1906.01454"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-lombard-effect-influence-on-end-to-end-audio-visual-speech-recognition-1906.02112</loc><lastmod>2019-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-lombard-effect-influence-on-end-to-end-audio-visual-speech-recognition-1906.02112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-lombard-effect-influence-on-end-to-end-audio-visual-speech-recognition-1906.02112"/></url>
<url><loc>https://scifaro.com/en/abs/gibbonfindr-an-r-package-for-the-detection-and-classification-of-acoustic-signals-1906.02572</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gibbonfindr-an-r-package-for-the-detection-and-classification-of-acoustic-signals-1906.02572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gibbonfindr-an-r-package-for-the-detection-and-classification-of-acoustic-signals-1906.02572"/></url>
<url><loc>https://scifaro.com/en/abs/role-of-non-linear-data-processing-on-speech-recognition-task-in-the-framework-of-reservoir-computing-1906.02812</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/role-of-non-linear-data-processing-on-speech-recognition-task-in-the-framework-of-reservoir-computing-1906.02812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/role-of-non-linear-data-processing-on-speech-recognition-task-in-the-framework-of-reservoir-computing-1906.02812"/></url>
<url><loc>https://scifaro.com/en/abs/beamforming-and-other-methods-for-denoising-microphone-array-data-1906.02965</loc><lastmod>2019-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beamforming-and-other-methods-for-denoising-microphone-array-data-1906.02965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beamforming-and-other-methods-for-denoising-microphone-array-data-1906.02965"/></url>
<url><loc>https://scifaro.com/en/abs/using-generative-modelling-to-produce-varied-intonation-for-speech-synthesis-1906.04233</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-generative-modelling-to-produce-varied-intonation-for-speech-synthesis-1906.04233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-generative-modelling-to-produce-varied-intonation-for-speech-synthesis-1906.04233"/></url>
<url><loc>https://scifaro.com/en/abs/adjusting-pleasure-arousal-dominance-for-continuous-emotional-text-to-speech-synthesizer-1906.05507</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adjusting-pleasure-arousal-dominance-for-continuous-emotional-text-to-speech-synthesizer-1906.05507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adjusting-pleasure-arousal-dominance-for-continuous-emotional-text-to-speech-synthesizer-1906.05507"/></url>
<url><loc>https://scifaro.com/en/abs/telephonetic-making-neural-language-models-robust-to-asr-and-semantic-noise-1906.05678</loc><lastmod>2019-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/telephonetic-making-neural-language-models-robust-to-asr-and-semantic-noise-1906.05678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/telephonetic-making-neural-language-models-robust-to-asr-and-semantic-noise-1906.05678"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-emotion-recognition-system-using-speech-features-and-transcriptions-1906.05681</loc><lastmod>2019-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-emotion-recognition-system-using-speech-features-and-transcriptions-1906.05681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-emotion-recognition-system-using-speech-features-and-transcriptions-1906.05681"/></url>
<url><loc>https://scifaro.com/en/abs/focal-loss-based-residual-convolutional-neural-network-for-speech-emotion-recognition-1906.05682</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/focal-loss-based-residual-convolutional-neural-network-for-speech-emotion-recognition-1906.05682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/focal-loss-based-residual-convolutional-neural-network-for-speech-emotion-recognition-1906.05682"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-targeted-audio-visual-models-for-speech-recognition-in-cocktail-party-environments-1906.05962</loc><lastmod>2019-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-targeted-audio-visual-models-for-speech-recognition-in-cocktail-party-environments-1906.05962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-targeted-audio-visual-models-for-speech-recognition-in-cocktail-party-environments-1906.05962"/></url>
<url><loc>https://scifaro.com/en/abs/video-driven-speech-reconstruction-using-generative-adversarial-networks-1906.06301</loc><lastmod>2019-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video-driven-speech-reconstruction-using-generative-adversarial-networks-1906.06301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video-driven-speech-reconstruction-using-generative-adversarial-networks-1906.06301"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-based-adversarial-audio-attacks-1906.06355</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-based-adversarial-audio-attacks-1906.06355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-based-adversarial-audio-attacks-1906.06355"/></url>
<url><loc>https://scifaro.com/en/abs/audio-transport-a-generalized-portamento-via-optimal-transport-1906.06763</loc><lastmod>2019-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-transport-a-generalized-portamento-via-optimal-transport-1906.06763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-transport-a-generalized-portamento-via-optimal-transport-1906.06763"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-post-processing-algorithms-for-polyphonic-sound-event-detection-1906.06909</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-post-processing-algorithms-for-polyphonic-sound-event-detection-1906.06909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-post-processing-algorithms-for-polyphonic-sound-event-detection-1906.06909"/></url>
<url><loc>https://scifaro.com/en/abs/digivoice-voice-biomarker-featurization-and-analysis-pipeline-1906.07222</loc><lastmod>2019-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digivoice-voice-biomarker-featurization-and-analysis-pipeline-1906.07222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digivoice-voice-biomarker-featurization-and-analysis-pipeline-1906.07222"/></url>
<url><loc>https://scifaro.com/en/abs/combining-adversarial-training-and-disentangled-speech-representation-for-robust-zero-resource-subword-modeling-1906.07234</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-adversarial-training-and-disentangled-speech-representation-for-robust-zero-resource-subword-modeling-1906.07234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-adversarial-training-and-disentangled-speech-representation-for-robust-zero-resource-subword-modeling-1906.07234"/></url>
<url><loc>https://scifaro.com/en/abs/improving-unsupervised-subword-modeling-via-disentangled-speech-representation-learning-and-transformation-1906.07245</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-unsupervised-subword-modeling-via-disentangled-speech-representation-learning-and-transformation-1906.07245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-unsupervised-subword-modeling-via-disentangled-speech-representation-learning-and-transformation-1906.07245"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-delay-and-sum-beamforming-guided-by-visual-tracking-for-human-robot-interaction-1906.07298</loc><lastmod>2019-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-delay-and-sum-beamforming-guided-by-visual-tracking-for-human-robot-interaction-1906.07298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-delay-and-sum-beamforming-guided-by-visual-tracking-for-human-robot-interaction-1906.07298"/></url>
<url><loc>https://scifaro.com/en/abs/on-combining-features-for-single-channel-robust-speech-recognition-in-reverberant-environments-1906.07299</loc><lastmod>2019-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-combining-features-for-single-channel-robust-speech-recognition-in-reverberant-environments-1906.07299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-combining-features-for-single-channel-robust-speech-recognition-in-reverberant-environments-1906.07299"/></url>
<url><loc>https://scifaro.com/en/abs/margin-matters-towards-more-discriminative-deep-neural-network-embeddings-for-speaker-recognition-1906.07317</loc><lastmod>2019-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/margin-matters-towards-more-discriminative-deep-neural-network-embeddings-for-speaker-recognition-1906.07317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/margin-matters-towards-more-discriminative-deep-neural-network-embeddings-for-speaker-recognition-1906.07317"/></url>
<url><loc>https://scifaro.com/en/abs/deep-xi-as-a-front-end-for-robust-automatic-speech-recognition-1906.07319</loc><lastmod>2020-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-xi-as-a-front-end-for-robust-automatic-speech-recognition-1906.07319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-xi-as-a-front-end-for-robust-automatic-speech-recognition-1906.07319"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-speaker-adaptation-method-for-speech-synthesis-using-transcribed-and-untranscribed-speech-with-backpropagation-1906.07414</loc><lastmod>2019-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-speaker-adaptation-method-for-speech-synthesis-using-transcribed-and-untranscribed-speech-with-backpropagation-1906.07414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-speaker-adaptation-method-for-speech-synthesis-using-transcribed-and-untranscribed-speech-with-backpropagation-1906.07414"/></url>
<url><loc>https://scifaro.com/en/abs/square-root-based-multi-source-early-psd-estimation-and-recursive-retf-update-in-reverberant-environments-by-means-of-the-orthogonal-procrustes-problem-1906.07493</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/square-root-based-multi-source-early-psd-estimation-and-recursive-retf-update-in-reverberant-environments-by-means-of-the-orthogonal-procrustes-problem-1906.07493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/square-root-based-multi-source-early-psd-estimation-and-recursive-retf-update-in-reverberant-environments-by-means-of-the-orthogonal-procrustes-problem-1906.07493"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-sidelobe-cancellation-and-linear-prediction-kalman-filter-for-joint-multi-microphone-speech-dereverberation-interfering-speech-cancellation-and-noise-reduction-1906.07512</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-sidelobe-cancellation-and-linear-prediction-kalman-filter-for-joint-multi-microphone-speech-dereverberation-interfering-speech-cancellation-and-noise-reduction-1906.07512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-sidelobe-cancellation-and-linear-prediction-kalman-filter-for-joint-multi-microphone-speech-dereverberation-interfering-speech-cancellation-and-noise-reduction-1906.07512"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-signal-separation-and-deconvolution-with-generative-adversarial-networks-1906.07552</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-signal-separation-and-deconvolution-with-generative-adversarial-networks-1906.07552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-signal-separation-and-deconvolution-with-generative-adversarial-networks-1906.07552"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-cross-module-residual-learning-towards-lightweight-end-to-end-speech-coding-1906.07769</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-cross-module-residual-learning-towards-lightweight-end-to-end-speech-coding-1906.07769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-cross-module-residual-learning-towards-lightweight-end-to-end-speech-coding-1906.07769"/></url>
<url><loc>https://scifaro.com/en/abs/the-second-dihard-diarization-challenge-dataset-task-and-baselines-1906.07839</loc><lastmod>2019-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-second-dihard-diarization-challenge-dataset-task-and-baselines-1906.07839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-second-dihard-diarization-challenge-dataset-task-and-baselines-1906.07839"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stream-end-to-end-speech-recognition-1906.08041</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stream-end-to-end-speech-recognition-1906.08041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stream-end-to-end-speech-recognition-1906.08041"/></url>
<url><loc>https://scifaro.com/en/abs/real-to-h-space-encoder-for-speech-recognition-1906.08043</loc><lastmod>2019-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-to-h-space-encoder-for-speech-recognition-1906.08043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-to-h-space-encoder-for-speech-recognition-1906.08043"/></url>
<url><loc>https://scifaro.com/en/abs/robust-end-to-end-speaker-verification-using-eeg-1906.08044</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-end-to-end-speaker-verification-using-eeg-1906.08044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-end-to-end-speaker-verification-using-eeg-1906.08044"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-with-no-speech-or-with-noisy-speech-beyond-english-1906.08045</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-with-no-speech-or-with-noisy-speech-beyond-english-1906.08045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-with-no-speech-or-with-noisy-speech-beyond-english-1906.08045"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-pyramid-encoding-with-convex-length-normalization-for-text-independent-speaker-verification-1906.08333</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-pyramid-encoding-with-convex-length-normalization-for-text-independent-speaker-verification-1906.08333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-pyramid-encoding-with-convex-length-normalization-for-text-independent-speaker-verification-1906.08333"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-enhancement-for-melp-speech-codec-in-noisy-communication-environment-1906.08407</loc><lastmod>2019-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-enhancement-for-melp-speech-codec-in-noisy-communication-environment-1906.08407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-enhancement-for-melp-speech-codec-in-noisy-communication-environment-1906.08407"/></url>
<url><loc>https://scifaro.com/en/abs/a-signal-subspace-rotation-method-for-localization-of-multiple-wideband-sound-sources-1906.08847</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-signal-subspace-rotation-method-for-localization-of-multiple-wideband-sound-sources-1906.08847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-signal-subspace-rotation-method-for-localization-of-multiple-wideband-sound-sources-1906.08847"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-speech-recognition-with-no-speech-or-with-noisy-speech-1906.08871</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-speech-recognition-with-no-speech-or-with-noisy-speech-1906.08871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-speech-recognition-with-no-speech-or-with-noisy-speech-1906.08871"/></url>
<url><loc>https://scifaro.com/en/abs/mirage-2d-source-localization-using-microphone-pair-augmentation-with-echoes-1906.08968</loc><lastmod>2019-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mirage-2d-source-localization-using-microphone-pair-augmentation-with-echoes-1906.08968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mirage-2d-source-localization-using-microphone-pair-augmentation-with-echoes-1906.08968"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-asr-for-code-switched-hindi-english-speech-1906.09426</loc><lastmod>2019-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-asr-for-code-switched-hindi-english-speech-1906.09426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-asr-for-code-switched-hindi-english-speech-1906.09426"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-modeling-for-automatic-lyrics-to-audio-alignment-1906.10369</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-modeling-for-automatic-lyrics-to-audio-alignment-1906.10369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-modeling-for-automatic-lyrics-to-audio-alignment-1906.10369"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-sequence-to-sequence-voice-conversion-with-disentangled-linguistic-and-speaker-representations-1906.10508</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-sequence-to-sequence-voice-conversion-with-disentangled-linguistic-and-speaker-representations-1906.10508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-sequence-to-sequence-voice-conversion-with-disentangled-linguistic-and-speaker-representations-1906.10508"/></url>
<url><loc>https://scifaro.com/en/abs/dali-a-large-dataset-of-synchronized-audio-lyrics-and-notes-automatically-created-using-teacher-student-machine-learning-paradigm-1906.10606</loc><lastmod>2019-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dali-a-large-dataset-of-synchronized-audio-lyrics-and-notes-automatically-created-using-teacher-student-machine-learning-paradigm-1906.10606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dali-a-large-dataset-of-synchronized-audio-lyrics-and-notes-automatically-created-using-teacher-student-machine-learning-paradigm-1906.10606"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-emotional-speech-synthesis-using-style-tokens-and-semi-supervised-training-1906.10859</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-emotional-speech-synthesis-using-style-tokens-and-semi-supervised-training-1906.10859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-emotional-speech-synthesis-using-style-tokens-and-semi-supervised-training-1906.10859"/></url>
<url><loc>https://scifaro.com/en/abs/integration-of-tensorflow-based-acoustic-model-with-kaldi-wfst-decoder-1906.11018</loc><lastmod>2019-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integration-of-tensorflow-based-acoustic-model-with-kaldi-wfst-decoder-1906.11018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integration-of-tensorflow-based-acoustic-model-with-kaldi-wfst-decoder-1906.11018"/></url>
<url><loc>https://scifaro.com/en/abs/multi-span-acoustic-modelling-using-raw-waveform-signals-1906.11047</loc><lastmod>2019-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-span-acoustic-modelling-using-raw-waveform-signals-1906.11047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-span-acoustic-modelling-using-raw-waveform-signals-1906.11047"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-phoneme-and-word-discovery-from-multiple-speakers-using-double-articulation-analyzer-and-neural-network-with-parametric-bias-1906.11049</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-phoneme-and-word-discovery-from-multiple-speakers-using-double-articulation-analyzer-and-neural-network-with-parametric-bias-1906.11049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-phoneme-and-word-discovery-from-multiple-speakers-using-double-articulation-analyzer-and-neural-network-with-parametric-bias-1906.11049"/></url>
<url><loc>https://scifaro.com/en/abs/re-annotation-of-cough-events-in-the-ami-corpus-1906.11509</loc><lastmod>2019-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/re-annotation-of-cough-events-in-the-ami-corpus-1906.11509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/re-annotation-of-cough-events-in-the-ami-corpus-1906.11509"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-music-classification-with-densenet-and-data-augmentation-1906.11620</loc><lastmod>2019-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-music-classification-with-densenet-and-data-augmentation-1906.11620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-music-classification-with-densenet-and-data-augmentation-1906.11620"/></url>
<url><loc>https://scifaro.com/en/abs/ruslan-russian-spoken-language-corpus-for-speech-synthesis-1906.11645</loc><lastmod>2019-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ruslan-russian-spoken-language-corpus-for-speech-synthesis-1906.11645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ruslan-russian-spoken-language-corpus-for-speech-synthesis-1906.11645"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-sound-source-localization-with-svd-phat-1906.11913</loc><lastmod>2019-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-sound-source-localization-with-svd-phat-1906.11913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-sound-source-localization-with-svd-phat-1906.11913"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-by-adversarial-synthesis-a-novel-approach-for-speech-vocoding-1907.00772</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-by-adversarial-synthesis-a-novel-approach-for-speech-vocoding-1907.00772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-by-adversarial-synthesis-a-novel-approach-for-speech-vocoding-1907.00772"/></url>
<url><loc>https://scifaro.com/en/abs/quasi-periodic-wavenet-vocoder-a-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-1907.00797</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quasi-periodic-wavenet-vocoder-a-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-1907.00797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quasi-periodic-wavenet-vocoder-a-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-1907.00797"/></url>
<url><loc>https://scifaro.com/en/abs/ultrasound-tongue-imaging-for-diarization-and-alignment-of-child-speech-therapy-sessions-1907.00818</loc><lastmod>2019-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultrasound-tongue-imaging-for-diarization-and-alignment-of-child-speech-therapy-sessions-1907.00818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultrasound-tongue-imaging-for-diarization-and-alignment-of-child-speech-therapy-sessions-1907.00818"/></url>
<url><loc>https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-quantized-distillation-1907.00873</loc><lastmod>2019-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-quantized-distillation-1907.00873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compression-of-acoustic-event-detection-models-with-quantized-distillation-1907.00873"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-language-models-for-lvcsr-in-first-pass-decoding-and-lattice-rescoring-1907.01030</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-language-models-for-lvcsr-in-first-pass-decoding-and-lattice-rescoring-1907.01030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-language-models-for-lvcsr-in-first-pass-decoding-and-lattice-rescoring-1907.01030"/></url>
<url><loc>https://scifaro.com/en/abs/conditioned-u-net-introducing-a-control-mechanism-in-the-u-net-for-multiple-source-separations-1907.01277</loc><lastmod>2019-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioned-u-net-introducing-a-control-mechanism-in-the-u-net-for-multiple-source-separations-1907.01277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioned-u-net-introducing-a-control-mechanism-in-the-u-net-for-multiple-source-separations-1907.01277"/></url>
<url><loc>https://scifaro.com/en/abs/lipper-synthesizing-thy-speech-using-multi-view-lipreading-1907.01367</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipper-synthesizing-thy-speech-using-multi-view-lipreading-1907.01367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipper-synthesizing-thy-speech-using-multi-view-lipreading-1907.01367"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-verbal-and-nonverbal-features-for-predicting-group-performance-1907.01369</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-verbal-and-nonverbal-features-for-predicting-group-performance-1907.01369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-verbal-and-nonverbal-features-for-predicting-group-performance-1907.01369"/></url>
<url><loc>https://scifaro.com/en/abs/improving-performance-of-end-to-end-asr-on-numeric-sequences-1907.01372</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-performance-of-end-to-end-asr-on-numeric-sequences-1907.01372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-performance-of-end-to-end-asr-on-numeric-sequences-1907.01372"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-lattice-free-and-lattice-based-sequence-discriminative-training-criteria-for-lvcsr-1907.01409</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-lattice-free-and-lattice-based-sequence-discriminative-training-criteria-for-lvcsr-1907.01409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-lattice-free-and-lattice-based-sequence-discriminative-training-criteria-for-lvcsr-1907.01409"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-classification-of-phonetic-segments-from-raw-ultrasound-in-child-speech-1907.01413</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-classification-of-phonetic-segments-from-raw-ultrasound-in-child-speech-1907.01413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-classification-of-phonetic-segments-from-raw-ultrasound-in-child-speech-1907.01413"/></url>
<url><loc>https://scifaro.com/en/abs/sub-band-convolutional-neural-networks-for-small-footprint-spoken-term-classification-1907.01448</loc><lastmod>2019-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-band-convolutional-neural-networks-for-small-footprint-spoken-term-classification-1907.01448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-band-convolutional-neural-networks-for-small-footprint-spoken-term-classification-1907.01448"/></url>
<url><loc>https://scifaro.com/en/abs/midi-sandwich-multi-model-multi-task-hierarchical-conditional-vae-gan-networks-for-symbolic-single-track-music-generation-1907.01607</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midi-sandwich-multi-model-multi-task-hierarchical-conditional-vae-gan-networks-for-symbolic-single-track-music-generation-1907.01607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midi-sandwich-multi-model-multi-task-hierarchical-conditional-vae-gan-networks-for-symbolic-single-track-music-generation-1907.01607"/></url>
<url><loc>https://scifaro.com/en/abs/attention-model-for-articulatory-features-detection-1907.01914</loc><lastmod>2019-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-model-for-articulatory-features-detection-1907.01914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-model-for-articulatory-features-detection-1907.01914"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-with-high-frame-rate-features-extraction-1907.01957</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-high-frame-rate-features-extraction-1907.01957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-high-frame-rate-features-extraction-1907.01957"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-smiip-system-for-nist-2018-speaker-recognition-evaluation-1907.02191</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-smiip-system-for-nist-2018-speaker-recognition-evaluation-1907.02191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-smiip-system-for-nist-2018-speaker-recognition-evaluation-1907.02191"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-system-for-the-speaker-recognition-task-of-the-2019-voices-from-a-distance-challenge-1907.02194</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-system-for-the-speaker-recognition-task-of-the-2019-voices-from-a-distance-challenge-1907.02194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-system-for-the-speaker-recognition-task-of-the-2019-voices-from-a-distance-challenge-1907.02194"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-robust-prosody-transfer-for-single-speaker-neural-text-to-speech-1907.02479</loc><lastmod>2019-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-robust-prosody-transfer-for-single-speaker-neural-text-to-speech-1907.02479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-robust-prosody-transfer-for-single-speaker-neural-text-to-speech-1907.02479"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-replay-detection-system-for-the-asvspoof-2019-challenge-on-data-augmentation-feature-representation-classification-and-fusion-1907.02663</loc><lastmod>2019-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-replay-detection-system-for-the-asvspoof-2019-challenge-on-data-augmentation-feature-representation-classification-and-fusion-1907.02663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-replay-detection-system-for-the-asvspoof-2019-challenge-on-data-augmentation-feature-representation-classification-and-fusion-1907.02663"/></url>
<url><loc>https://scifaro.com/en/abs/a-methodology-for-controlling-the-emotional-expressiveness-in-synthetic-speech-a-deep-learning-approach-1907.02784</loc><lastmod>2019-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-methodology-for-controlling-the-emotional-expressiveness-in-synthetic-speech-a-deep-learning-approach-1907.02784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-methodology-for-controlling-the-emotional-expressiveness-in-synthetic-speech-a-deep-learning-approach-1907.02784"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-preserving-speaker-recognition-with-cohort-score-normalisation-1907.03454</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-preserving-speaker-recognition-with-cohort-score-normalisation-1907.03454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-preserving-speaker-recognition-with-cohort-score-normalisation-1907.03454"/></url>
<url><loc>https://scifaro.com/en/abs/the-gdpr-speech-data-reflections-of-legal-and-technology-communities-first-steps-towards-a-common-understanding-1907.03458</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-gdpr-speech-data-reflections-of-legal-and-technology-communities-first-steps-towards-a-common-understanding-1907.03458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-gdpr-speech-data-reflections-of-legal-and-technology-communities-first-steps-towards-a-common-understanding-1907.03458"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-deep-learning-model-for-the-detection-and-reconstruction-of-dysarthric-speech-1907.04743</loc><lastmod>2019-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-deep-learning-model-for-the-detection-and-reconstruction-of-dysarthric-speech-1907.04743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-deep-learning-model-for-the-detection-and-reconstruction-of-dysarthric-speech-1907.04743"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-mixed-bandwidth-deep-neural-network-acoustic-modeling-for-automatic-speech-recognition-1907.04887</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-mixed-bandwidth-deep-neural-network-acoustic-modeling-for-automatic-speech-recognition-1907.04887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-mixed-bandwidth-deep-neural-network-acoustic-modeling-for-automatic-speech-recognition-1907.04887"/></url>
<url><loc>https://scifaro.com/en/abs/listen-attend-spell-and-adapt-speaker-adapted-sequence-to-sequence-asr-1907.04916</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-attend-spell-and-adapt-speaker-adapted-sequence-to-sequence-asr-1907.04916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-attend-spell-and-adapt-speaker-adapted-sequence-to-sequence-asr-1907.04916"/></url>
<url><loc>https://scifaro.com/en/abs/synchronizing-audio-visual-film-stimuli-in-unity-version-5-5-1f1-game-engines-as-a-tool-for-research-1907.04926</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synchronizing-audio-visual-film-stimuli-in-unity-version-5-5-1f1-game-engines-as-a-tool-for-research-1907.04926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synchronizing-audio-visual-film-stimuli-in-unity-version-5-5-1f1-game-engines-as-a-tool-for-research-1907.04926"/></url>
<url><loc>https://scifaro.com/en/abs/speech-bandwidth-extension-with-wavenet-1907.04927</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-bandwidth-extension-with-wavenet-1907.04927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-bandwidth-extension-with-wavenet-1907.04927"/></url>
<url><loc>https://scifaro.com/en/abs/bag-of-audio-words-based-on-autoencoder-codebook-for-continuous-emotion-prediction-1907.04928</loc><lastmod>2019-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bag-of-audio-words-based-on-autoencoder-codebook-for-continuous-emotion-prediction-1907.04928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bag-of-audio-words-based-on-autoencoder-codebook-for-continuous-emotion-prediction-1907.04928"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-and-sound-activity-detection-a-multi-task-approach-1907.05122</loc><lastmod>2019-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-and-sound-activity-detection-a-multi-task-approach-1907.05122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-and-sound-activity-detection-a-multi-task-approach-1907.05122"/></url>
<url><loc>https://scifaro.com/en/abs/effective-incorporation-of-speaker-information-in-utterance-encoding-in-dialog-1907.05599</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-incorporation-of-speaker-information-in-utterance-encoding-in-dialog-1907.05599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-incorporation-of-speaker-information-in-utterance-encoding-in-dialog-1907.05599"/></url>
<url><loc>https://scifaro.com/en/abs/teach-an-all-rounder-with-experts-in-different-domains-1907.05698</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teach-an-all-rounder-with-experts-in-different-domains-1907.05698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teach-an-all-rounder-with-experts-in-different-domains-1907.05698"/></url>
<url><loc>https://scifaro.com/en/abs/a-highly-efficient-distributed-deep-learning-system-for-automatic-speech-recognition-1907.05701</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-highly-efficient-distributed-deep-learning-system-for-automatic-speech-recognition-1907.05701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-highly-efficient-distributed-deep-learning-system-for-automatic-speech-recognition-1907.05701"/></url>
<url><loc>https://scifaro.com/en/abs/deep-auscultation-predicting-respiratory-anomalies-and-diseases-via-recurrent-neural-networks-1907.05708</loc><lastmod>2019-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-auscultation-predicting-respiratory-anomalies-and-diseases-via-recurrent-neural-networks-1907.05708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-auscultation-predicting-respiratory-anomalies-and-diseases-via-recurrent-neural-networks-1907.05708"/></url>
<url><loc>https://scifaro.com/en/abs/voice-pathology-detection-using-deep-learning-a-preliminary-study-1907.05905</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-pathology-detection-using-deep-learning-a-preliminary-study-1907.05905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-pathology-detection-using-deep-learning-a-preliminary-study-1907.05905"/></url>
<url><loc>https://scifaro.com/en/abs/learn-spelling-from-teachers-transferring-knowledge-from-language-models-to-sequence-to-sequence-speech-recognition-1907.06017</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn-spelling-from-teachers-transferring-knowledge-from-language-models-to-sequence-to-sequence-speech-recognition-1907.06017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn-spelling-from-teachers-transferring-knowledge-from-language-models-to-sequence-to-sequence-speech-recognition-1907.06017"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-with-random-digit-strings-using-uncertainty-normalized-hmm-based-i-vectors-1907.06111</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-with-random-digit-strings-using-uncertainty-normalized-hmm-based-i-vectors-1907.06111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-with-random-digit-strings-using-uncertainty-normalized-hmm-based-i-vectors-1907.06111"/></url>
<url><loc>https://scifaro.com/en/abs/but-voices-2019-system-description-1907.06112</loc><lastmod>2019-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-voices-2019-system-description-1907.06112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-voices-2019-system-description-1907.06112"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-the-data-augmentation-scheme-with-various-classifiers-for-acoustic-scene-modeling-1907.06639</loc><lastmod>2019-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-the-data-augmentation-scheme-with-various-classifiers-for-acoustic-scene-modeling-1907.06639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-the-data-augmentation-scheme-with-various-classifiers-for-acoustic-scene-modeling-1907.06639"/></url>
<url><loc>https://scifaro.com/en/abs/towards-adapting-nmf-dictionaries-using-total-variability-modeling-for-noise-robust-acoustic-features-1907.06859</loc><lastmod>2019-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-adapting-nmf-dictionaries-using-total-variability-modeling-for-noise-robust-acoustic-features-1907.06859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-adapting-nmf-dictionaries-using-total-variability-modeling-for-noise-robust-acoustic-features-1907.06859"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-fusion-of-attentive-convolutional-neural-networks-for-dcase2019-challenge-1907.07127</loc><lastmod>2019-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-fusion-of-attentive-convolutional-neural-networks-for-dcase2019-challenge-1907.07127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-fusion-of-attentive-convolutional-neural-networks-for-dcase2019-challenge-1907.07127"/></url>
<url><loc>https://scifaro.com/en/abs/ap19-olr-challenge-three-tasks-and-their-baselines-1907.07626</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ap19-olr-challenge-three-tasks-and-their-baselines-1907.07626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ap19-olr-challenge-three-tasks-and-their-baselines-1907.07626"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-sequence-to-sequence-voice-conversion-with-limited-data-1907.07769</loc><lastmod>2019-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-sequence-to-sequence-voice-conversion-with-limited-data-1907.07769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-sequence-to-sequence-voice-conversion-with-limited-data-1907.07769"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-target-set-reduction-for-end-to-end-speech-recognition-of-hindi-english-code-switching-data-1907.08293</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-target-set-reduction-for-end-to-end-speech-recognition-of-hindi-english-code-switching-data-1907.08293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-target-set-reduction-for-end-to-end-speech-recognition-of-hindi-english-code-switching-data-1907.08293"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-speaker-embedding-using-subjective-inter-speaker-similarity-for-multi-speaker-modeling-in-speech-synthesis-1907.08294</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-speaker-embedding-using-subjective-inter-speaker-similarity-for-multi-speaker-modeling-in-speech-synthesis-1907.08294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-speaker-embedding-using-subjective-inter-speaker-similarity-for-multi-speaker-modeling-in-speech-synthesis-1907.08294"/></url>
<url><loc>https://scifaro.com/en/abs/batch-uniformization-for-minimizing-maximum-anomaly-score-of-dnn-based-anomaly-detection-in-sounds-1907.08338</loc><lastmod>2019-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/batch-uniformization-for-minimizing-maximum-anomaly-score-of-dnn-based-anomaly-detection-in-sounds-1907.08338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/batch-uniformization-for-minimizing-maximum-anomaly-score-of-dnn-based-anomaly-detection-in-sounds-1907.08338"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-voice-conversion-with-quasi-periodic-wavenet-vocoder-1907.08940</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-voice-conversion-with-quasi-periodic-wavenet-vocoder-1907.08940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-voice-conversion-with-quasi-periodic-wavenet-vocoder-1907.08940"/></url>
<url><loc>https://scifaro.com/en/abs/forward-backward-decoding-for-regularizing-end-to-end-tts-1907.09006</loc><lastmod>2019-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/forward-backward-decoding-for-regularizing-end-to-end-tts-1907.09006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/forward-backward-decoding-for-regularizing-end-to-end-tts-1907.09006"/></url>
<url><loc>https://scifaro.com/en/abs/ml-estimation-and-crbs-for-reverberation-speech-and-noise-psds-in-rank-deficient-noise-field-1907.09250</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ml-estimation-and-crbs-for-reverberation-speech-and-noise-psds-in-rank-deficient-noise-field-1907.09250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ml-estimation-and-crbs-for-reverberation-speech-and-noise-psds-in-rank-deficient-noise-field-1907.09250"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-voice-conversion-with-cyclic-variational-autoencoder-1907.10185</loc><lastmod>2019-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-with-cyclic-variational-autoencoder-1907.10185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-with-cyclic-variational-autoencoder-1907.10185"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-based-similarity-measurement-with-spectral-clustering-for-speaker-diarization-1907.10393</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-based-similarity-measurement-with-spectral-clustering-for-speaker-diarization-1907.10393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-based-similarity-measurement-with-spectral-clustering-for-speaker-diarization-1907.10393"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-neural-network-for-short-segment-speaker-recognition-1907.10420</loc><lastmod>2019-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-neural-network-for-short-segment-speaker-recognition-1907.10420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-neural-network-for-short-segment-speaker-recognition-1907.10420"/></url>
<url><loc>https://scifaro.com/en/abs/cross-attention-end-to-end-asr-for-two-party-conversations-1907.10726</loc><lastmod>2019-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-attention-end-to-end-asr-for-two-party-conversations-1907.10726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-attention-end-to-end-asr-for-two-party-conversations-1907.10726"/></url>
<url><loc>https://scifaro.com/en/abs/correlation-distance-skip-connection-denoising-autoencoder-cdsk-dae-for-speech-feature-enhancement-1907.11361</loc><lastmod>2019-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correlation-distance-skip-connection-denoising-autoencoder-cdsk-dae-for-speech-feature-enhancement-1907.11361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correlation-distance-skip-connection-denoising-autoencoder-cdsk-dae-for-speech-feature-enhancement-1907.11361"/></url>
<url><loc>https://scifaro.com/en/abs/localization-uncertainty-in-time-amplitude-stereophonic-reproduction-1907.11425</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-uncertainty-in-time-amplitude-stereophonic-reproduction-1907.11425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-uncertainty-in-time-amplitude-stereophonic-reproduction-1907.11425"/></url>
<url><loc>https://scifaro.com/en/abs/generalization-of-spectrum-differential-based-direct-waveform-modification-for-voice-conversion-1907.11898</loc><lastmod>2019-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalization-of-spectrum-differential-based-direct-waveform-modification-for-voice-conversion-1907.11898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalization-of-spectrum-differential-based-direct-waveform-modification-for-voice-conversion-1907.11898"/></url>
<url><loc>https://scifaro.com/en/abs/mirage-multichannel-database-of-room-impulse-responses-measured-on-high-resolution-cube-shaped-grid-in-multiple-acoustic-conditions-1907.12421</loc><lastmod>2019-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mirage-multichannel-database-of-room-impulse-responses-measured-on-high-resolution-cube-shaped-grid-in-multiple-acoustic-conditions-1907.12421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mirage-multichannel-database-of-room-impulse-responses-measured-on-high-resolution-cube-shaped-grid-in-multiple-acoustic-conditions-1907.12421"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-robust-3-d-sound-source-localization-with-dsvd-phat-1907.12621</loc><lastmod>2019-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-robust-3-d-sound-source-localization-with-dsvd-phat-1907.12621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-robust-3-d-sound-source-localization-with-dsvd-phat-1907.12621"/></url>
<url><loc>https://scifaro.com/en/abs/multi-frame-cross-entropy-training-for-convolutional-neural-networks-in-speech-recognition-1907.13121</loc><lastmod>2019-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-frame-cross-entropy-training-for-convolutional-neural-networks-in-speech-recognition-1907.13121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-frame-cross-entropy-training-for-convolutional-neural-networks-in-speech-recognition-1907.13121"/></url>
<url><loc>https://scifaro.com/en/abs/sound-source-detection-localization-and-classification-using-consecutive-ensemble-of-crnn-models-1908.00766</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-source-detection-localization-and-classification-using-consecutive-ensemble-of-crnn-models-1908.00766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-source-detection-localization-and-classification-using-consecutive-ensemble-of-crnn-models-1908.00766"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-convolutional-time-frequency-channel-squeeze-and-excitation-1908.01399</loc><lastmod>2019-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-convolutional-time-frequency-channel-squeeze-and-excitation-1908.01399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-in-multichannel-audio-using-convolutional-time-frequency-channel-squeeze-and-excitation-1908.01399"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-text-independent-speaker-verification-using-unsupervised-adversarial-discriminative-domain-adaptation-1908.01447</loc><lastmod>2020-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-text-independent-speaker-verification-using-unsupervised-adversarial-discriminative-domain-adaptation-1908.01447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-text-independent-speaker-verification-using-unsupervised-adversarial-discriminative-domain-adaptation-1908.01447"/></url>
<url><loc>https://scifaro.com/en/abs/probabilistic-permutation-invariant-training-for-speech-separation-1908.01768</loc><lastmod>2019-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probabilistic-permutation-invariant-training-for-speech-separation-1908.01768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probabilistic-permutation-invariant-training-for-speech-separation-1908.01768"/></url>
<url><loc>https://scifaro.com/en/abs/practical-speech-recognition-with-htk-1908.02119</loc><lastmod>2019-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/practical-speech-recognition-with-htk-1908.02119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/practical-speech-recognition-with-htk-1908.02119"/></url>
<url><loc>https://scifaro.com/en/abs/triplet-based-embedding-distance-and-similarity-learning-for-text-independent-speaker-verification-1908.02283</loc><lastmod>2019-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/triplet-based-embedding-distance-and-similarity-learning-for-text-independent-speaker-verification-1908.02283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/triplet-based-embedding-distance-and-similarity-learning-for-text-independent-speaker-verification-1908.02283"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-text-independent-speaker-verification-framework-with-a-keyword-adversarial-network-1908.02612</loc><lastmod>2019-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-text-independent-speaker-verification-framework-with-a-keyword-adversarial-network-1908.02612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-text-independent-speaker-verification-framework-with-a-keyword-adversarial-network-1908.02612"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-likelihood-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1908.02710</loc><lastmod>2019-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-likelihood-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1908.02710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-likelihood-convolutional-beamformer-for-simultaneous-denoising-and-dereverberation-1908.02710"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-attention-model-for-weakly-labeled-audio-event-classification-1908.02876</loc><lastmod>2019-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-attention-model-for-weakly-labeled-audio-event-classification-1908.02876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-attention-model-for-weakly-labeled-audio-event-classification-1908.02876"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-synchronous-single-frequency-filtering-spectrogram-for-speech-emotion-recognition-1908.03054</loc><lastmod>2019-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-synchronous-single-frequency-filtering-spectrogram-for-speech-emotion-recognition-1908.03054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-synchronous-single-frequency-filtering-spectrogram-for-speech-emotion-recognition-1908.03054"/></url>
<url><loc>https://scifaro.com/en/abs/toyadmos-a-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-1908.03299</loc><lastmod>2019-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toyadmos-a-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-1908.03299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toyadmos-a-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-1908.03299"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-cross-lingual-speaker-and-phonetic-diversity-for-unsupervised-subword-modeling-1908.03538</loc><lastmod>2019-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-cross-lingual-speaker-and-phonetic-diversity-for-unsupervised-subword-modeling-1908.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-cross-lingual-speaker-and-phonetic-diversity-for-unsupervised-subword-modeling-1908.03538"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-dependent-facial-animation-from-affective-speech-1908.03904</loc><lastmod>2019-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-dependent-facial-animation-from-affective-speech-1908.03904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-dependent-facial-animation-from-affective-speech-1908.03904"/></url>
<url><loc>https://scifaro.com/en/abs/personal-vad-speaker-conditioned-voice-activity-detection-1908.04284</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personal-vad-speaker-conditioned-voice-activity-detection-1908.04284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personal-vad-speaker-conditioned-voice-activity-detection-1908.04284"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-mitigating-the-impact-of-acoustic-environments-on-machine-to-machine-signalling-1908.04672</loc><lastmod>2019-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-mitigating-the-impact-of-acoustic-environments-on-machine-to-machine-signalling-1908.04672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-mitigating-the-impact-of-acoustic-environments-on-machine-to-machine-signalling-1908.04672"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-using-speaker-embeddings-and-transfer-learning-1908.04737</loc><lastmod>2019-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-using-speaker-embeddings-and-transfer-learning-1908.04737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-using-speaker-embeddings-and-transfer-learning-1908.04737"/></url>
<url><loc>https://scifaro.com/en/abs/rtf-steered-binaural-mvdr-beamforming-incorporating-multiple-external-microphones-1908.04848</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rtf-steered-binaural-mvdr-beamforming-incorporating-multiple-external-microphones-1908.04848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rtf-steered-binaural-mvdr-beamforming-incorporating-multiple-external-microphones-1908.04848"/></url>
<url><loc>https://scifaro.com/en/abs/components-loss-for-neural-networks-in-mask-based-speech-enhancement-1908.05087</loc><lastmod>2019-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/components-loss-for-neural-networks-in-mask-based-speech-enhancement-1908.05087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/components-loss-for-neural-networks-in-mask-based-speech-enhancement-1908.05087"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-semi-supervised-training-through-a-dropout-regularization-in-end-to-end-speech-recognition-1908.05227</loc><lastmod>2019-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-semi-supervised-training-through-a-dropout-regularization-in-end-to-end-speech-recognition-1908.05227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-semi-supervised-training-through-a-dropout-regularization-in-end-to-end-speech-recognition-1908.05227"/></url>
<url><loc>https://scifaro.com/en/abs/state-of-the-art-speech-recognition-using-eeg-and-towards-decoding-of-speech-spectrum-from-eeg-1908.05743</loc><lastmod>2020-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/state-of-the-art-speech-recognition-using-eeg-and-towards-decoding-of-speech-spectrum-from-eeg-1908.05743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/state-of-the-art-speech-recognition-using-eeg-and-towards-decoding-of-speech-spectrum-from-eeg-1908.05743"/></url>
<url><loc>https://scifaro.com/en/abs/two-staged-acoustic-modeling-adaption-for-robust-speech-recognition-by-the-example-of-german-oral-history-interviews-1908.06709</loc><lastmod>2019-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-staged-acoustic-modeling-adaption-for-robust-speech-recognition-by-the-example-of-german-oral-history-interviews-1908.06709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-staged-acoustic-modeling-adaption-for-robust-speech-recognition-by-the-example-of-german-oral-history-interviews-1908.06709"/></url>
<url><loc>https://scifaro.com/en/abs/salient-speech-representations-based-on-cloned-networks-1908.07045</loc><lastmod>2019-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salient-speech-representations-based-on-cloned-networks-1908.07045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salient-speech-representations-based-on-cloned-networks-1908.07045"/></url>
<url><loc>https://scifaro.com/en/abs/vop-detection-for-read-and-conversation-speech-using-cwt-coefficients-and-phone-boundaries-1908.08668</loc><lastmod>2019-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vop-detection-for-read-and-conversation-speech-using-cwt-coefficients-and-phone-boundaries-1908.08668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vop-detection-for-read-and-conversation-speech-using-cwt-coefficients-and-phone-boundaries-1908.08668"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-binarization-on-recurrent-neural-networks-for-single-channel-source-separation-1908.08898</loc><lastmod>2019-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-binarization-on-recurrent-neural-networks-for-single-channel-source-separation-1908.08898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-binarization-on-recurrent-neural-networks-for-single-channel-source-separation-1908.08898"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-and-multimode-phone-recognition-system-for-indian-languages-1908.09634</loc><lastmod>2019-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-and-multimode-phone-recognition-system-for-indian-languages-1908.09634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-and-multimode-phone-recognition-system-for-indian-languages-1908.09634"/></url>
<url><loc>https://scifaro.com/en/abs/connecting-and-comparing-language-model-interpolation-techniques-1908.09738</loc><lastmod>2019-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/connecting-and-comparing-language-model-interpolation-techniques-1908.09738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/connecting-and-comparing-language-model-interpolation-techniques-1908.09738"/></url>
<url><loc>https://scifaro.com/en/abs/nearest-neighbor-search-based-bitwise-source-separation-using-discriminant-winner-take-all-hashing-1908.09799</loc><lastmod>2019-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nearest-neighbor-search-based-bitwise-source-separation-using-discriminant-winner-take-all-hashing-1908.09799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nearest-neighbor-search-based-bitwise-source-separation-using-discriminant-winner-take-all-hashing-1908.09799"/></url>
<url><loc>https://scifaro.com/en/abs/vae-based-domain-adaptation-for-speaker-verification-1908.10092</loc><lastmod>2019-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vae-based-domain-adaptation-for-speaker-verification-1908.10092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vae-based-domain-adaptation-for-speaker-verification-1908.10092"/></url>
<url><loc>https://scifaro.com/en/abs/neural-harmonic-plus-noise-waveform-model-with-trainable-maximum-voice-frequency-for-text-to-speech-synthesis-1908.10256</loc><lastmod>2019-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-harmonic-plus-noise-waveform-model-with-trainable-maximum-voice-frequency-for-text-to-speech-synthesis-1908.10256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-harmonic-plus-noise-waveform-model-with-trainable-maximum-voice-frequency-for-text-to-speech-synthesis-1908.10256"/></url>
<url><loc>https://scifaro.com/en/abs/initial-investigation-of-an-encoder-decoder-end-to-end-tts-framework-using-marginalization-of-monotonic-hard-latent-alignments-1908.11535</loc><lastmod>2019-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/initial-investigation-of-an-encoder-decoder-end-to-end-tts-framework-using-marginalization-of-monotonic-hard-latent-alignments-1908.11535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/initial-investigation-of-an-encoder-decoder-end-to-end-tts-framework-using-marginalization-of-monotonic-hard-latent-alignments-1908.11535"/></url>
<url><loc>https://scifaro.com/en/abs/enhancements-for-audio-only-diarization-systems-1909.00082</loc><lastmod>2019-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancements-for-audio-only-diarization-systems-1909.00082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancements-for-audio-only-diarization-systems-1909.00082"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-long-range-context-for-concurrent-dialogue-acts-recognition-1909.00521</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-long-range-context-for-concurrent-dialogue-acts-recognition-1909.00521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-long-range-context-for-concurrent-dialogue-acts-recognition-1909.00521"/></url>
<url><loc>https://scifaro.com/en/abs/the-locata-challenge-acoustic-source-localization-and-tracking-1909.01008</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-locata-challenge-acoustic-source-localization-and-tracking-1909.01008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-locata-challenge-acoustic-source-localization-and-tracking-1909.01008"/></url>
<url><loc>https://scifaro.com/en/abs/maximizing-mutual-information-for-tacotron-1909.01145</loc><lastmod>2019-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximizing-mutual-information-for-tacotron-1909.01145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximizing-mutual-information-for-tacotron-1909.01145"/></url>
<url><loc>https://scifaro.com/en/abs/bandwidth-embeddings-for-mixed-bandwidth-speech-recognition-1909.02667</loc><lastmod>2019-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandwidth-embeddings-for-mixed-bandwidth-speech-recognition-1909.02667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandwidth-embeddings-for-mixed-bandwidth-speech-recognition-1909.02667"/></url>
<url><loc>https://scifaro.com/en/abs/avaya-conversational-intelligence-a-real-time-system-for-spoken-language-understanding-in-human-human-call-center-conversations-1909.02851</loc><lastmod>2019-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avaya-conversational-intelligence-a-real-time-system-for-spoken-language-understanding-in-human-human-call-center-conversations-1909.02851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avaya-conversational-intelligence-a-real-time-system-for-spoken-language-understanding-in-human-human-call-center-conversations-1909.02851"/></url>
<url><loc>https://scifaro.com/en/abs/receptive-field-regularized-cnn-variants-for-acoustic-scene-classification-1909.02859</loc><lastmod>2019-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/receptive-field-regularized-cnn-variants-for-acoustic-scene-classification-1909.02859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/receptive-field-regularized-cnn-variants-for-acoustic-scene-classification-1909.02859"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-parallel-audio-recordings-to-enforce-device-invariance-in-cnn-based-acoustic-scene-classification-1909.02869</loc><lastmod>2019-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-parallel-audio-recordings-to-enforce-device-invariance-in-cnn-based-acoustic-scene-classification-1909.02869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-parallel-audio-recordings-to-enforce-device-invariance-in-cnn-based-acoustic-scene-classification-1909.02869"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-long-form-text-to-speech-comparing-the-ratings-of-sentences-and-paragraphs-1909.03965</loc><lastmod>2019-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-long-form-text-to-speech-comparing-the-ratings-of-sentences-and-paragraphs-1909.03965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-long-form-text-to-speech-comparing-the-ratings-of-sentences-and-paragraphs-1909.03965"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-cross-lingual-voice-conversion-using-bottleneck-features-1909.03974</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-cross-lingual-voice-conversion-using-bottleneck-features-1909.03974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-cross-lingual-voice-conversion-using-bottleneck-features-1909.03974"/></url>
<url><loc>https://scifaro.com/en/abs/self-teaching-networks-1909.04157</loc><lastmod>2019-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-teaching-networks-1909.04157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-teaching-networks-1909.04157"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-domain-variant-of-velvet-noise-and-its-application-to-acoustic-measurements-1909.04301</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-domain-variant-of-velvet-noise-and-its-application-to-acoustic-measurements-1909.04301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-domain-variant-of-velvet-noise-and-its-application-to-acoustic-measurements-1909.04301"/></url>
<url><loc>https://scifaro.com/en/abs/generative-speech-enhancement-based-on-cloned-networks-1909.04776</loc><lastmod>2019-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-speech-enhancement-based-on-cloned-networks-1909.04776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-speech-enhancement-based-on-cloned-networks-1909.04776"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-multilingual-speech-recognition-with-a-streaming-end-to-end-model-1909.05330</loc><lastmod>2019-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-multilingual-speech-recognition-with-a-streaming-end-to-end-model-1909.05330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-multilingual-speech-recognition-with-a-streaming-end-to-end-model-1909.05330"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-and-correlating-rhythm-formants-in-speech-1909.05639</loc><lastmod>2019-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-and-correlating-rhythm-formants-in-speech-1909.05639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-and-correlating-rhythm-formants-in-speech-1909.05639"/></url>
<url><loc>https://scifaro.com/en/abs/sams-net-a-sliced-attention-based-neural-network-for-music-source-separation-1909.05746</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sams-net-a-sliced-attention-based-neural-network-for-music-source-separation-1909.05746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sams-net-a-sliced-attention-based-neural-network-for-music-source-separation-1909.05746"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-permutation-free-objectives-1909.05952</loc><lastmod>2019-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-permutation-free-objectives-1909.05952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-permutation-free-objectives-1909.05952"/></url>
<url><loc>https://scifaro.com/en/abs/guided-learning-convolution-system-for-dcase-2019-task-4-1909.06178</loc><lastmod>2019-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-learning-convolution-system-for-dcase-2019-task-4-1909.06178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-learning-convolution-system-for-dcase-2019-task-4-1909.06178"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-self-attention-1909.06247</loc><lastmod>2019-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-self-attention-1909.06247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-neural-speaker-diarization-with-self-attention-1909.06247"/></url>
<url><loc>https://scifaro.com/en/abs/probing-the-information-encoded-in-x-vectors-1909.06351</loc><lastmod>2020-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-the-information-encoded-in-x-vectors-1909.06351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-the-information-encoded-in-x-vectors-1909.06351"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-graphemic-hybrid-asr-with-massive-data-augmentation-1909.06522</loc><lastmod>2020-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-graphemic-hybrid-asr-with-massive-data-augmentation-1909.06522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-graphemic-hybrid-asr-with-massive-data-augmentation-1909.06522"/></url>
<url><loc>https://scifaro.com/en/abs/bootstrapping-non-parallel-voice-conversion-from-speaker-adaptive-text-to-speech-1909.06532</loc><lastmod>2019-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bootstrapping-non-parallel-voice-conversion-from-speaker-adaptive-text-to-speech-1909.06532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bootstrapping-non-parallel-voice-conversion-from-speaker-adaptive-text-to-speech-1909.06532"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-source-channel-and-attention-based-sequence-to-sequence-models-for-speech-recognition-1909.06614</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-source-channel-and-attention-based-sequence-to-sequence-models-for-speech-recognition-1909.06614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-source-channel-and-attention-based-sequence-to-sequence-models-for-speech-recognition-1909.06614"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-into-on-device-personalization-of-end-to-end-automatic-speech-recognition-models-1909.06678</loc><lastmod>2019-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-into-on-device-personalization-of-end-to-end-automatic-speech-recognition-models-1909.06678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-into-on-device-personalization-of-end-to-end-automatic-speech-recognition-models-1909.06678"/></url>
<url><loc>https://scifaro.com/en/abs/many-to-many-voice-conversion-using-cycle-consistent-variational-autoencoder-with-multiple-decoders-1909.06805</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-using-cycle-consistent-variational-autoencoder-with-multiple-decoders-1909.06805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-using-cycle-consistent-variational-autoencoder-with-multiple-decoders-1909.06805"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-separation-and-dereverberation-with-a-two-stage-multimodal-network-1909.07352</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-separation-and-dereverberation-with-a-two-stage-multimodal-network-1909.07352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-separation-and-dereverberation-with-a-two-stage-multimodal-network-1909.07352"/></url>
<url><loc>https://scifaro.com/en/abs/black-box-attacks-on-automatic-speaker-verification-using-feedback-controlled-voice-conversion-1909.07655</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/black-box-attacks-on-automatic-speaker-verification-using-feedback-controlled-voice-conversion-1909.07655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/black-box-attacks-on-automatic-speaker-verification-using-feedback-controlled-voice-conversion-1909.07655"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-strategies-for-likelihood-ratio-computation-in-forensic-voice-comparison-with-automatic-systems-1909.08315</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-strategies-for-likelihood-ratio-computation-in-forensic-voice-comparison-with-automatic-systems-1909.08315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-strategies-for-likelihood-ratio-computation-in-forensic-voice-comparison-with-automatic-systems-1909.08315"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-filtering-at-the-edge-1909.08500</loc><lastmod>2019-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-filtering-at-the-edge-1909.08500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-filtering-at-the-edge-1909.08500"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-analysis-with-multi-head-attention-networks-1909.08961</loc><lastmod>2019-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-analysis-with-multi-head-attention-networks-1909.08961"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-analysis-with-multi-head-attention-networks-1909.08961"/></url>
<url><loc>https://scifaro.com/en/abs/wenets-a-convolutional-framework-for-evaluating-audio-waveforms-1909.09024</loc><lastmod>2019-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wenets-a-convolutional-framework-for-evaluating-audio-waveforms-1909.09024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wenets-a-convolutional-framework-for-evaluating-audio-waveforms-1909.09024"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-speech-enhancement-using-eeg-1909.09132</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-speech-enhancement-using-eeg-1909.09132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-speech-enhancement-using-eeg-1909.09132"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-lyrics-alignment-and-transcription-in-polyphonic-music-does-background-music-help-1909.10200</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-lyrics-alignment-and-transcription-in-polyphonic-music-does-background-music-help-1909.10200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-lyrics-alignment-and-transcription-in-polyphonic-music-does-background-music-help-1909.10200"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-neural-speech-synthesis-with-prosody-modification-capabilities-1909.10302</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-neural-speech-synthesis-with-prosody-modification-capabilities-1909.10302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-neural-speech-synthesis-with-prosody-modification-capabilities-1909.10302"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-semantics-from-speech-through-pre-training-1909.10924</loc><lastmod>2019-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-semantics-from-speech-through-pre-training-1909.10924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-semantics-from-speech-through-pre-training-1909.10924"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noise-robustness-in-speaker-identification-using-a-two-stage-attention-model-1909.11200</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noise-robustness-in-speaker-identification-using-a-two-stage-attention-model-1909.11200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noise-robustness-in-speaker-identification-using-a-two-stage-attention-model-1909.11200"/></url>
<url><loc>https://scifaro.com/en/abs/mpeg-h-audio-for-improving-accessibility-in-broadcasting-and-streaming-1909.11549</loc><lastmod>2019-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mpeg-h-audio-for-improving-accessibility-in-broadcasting-and-streaming-1909.11549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mpeg-h-audio-for-improving-accessibility-in-broadcasting-and-streaming-1909.11549"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-speech-and-non-speech-components-for-building-robust-acoustic-models-from-found-data-1909.11727</loc><lastmod>2019-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-speech-and-non-speech-components-for-building-robust-acoustic-models-from-found-data-1909.11727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-speech-and-non-speech-components-for-building-robust-acoustic-models-from-found-data-1909.11727"/></url>
<url><loc>https://scifaro.com/en/abs/self-adaptive-soft-voice-activity-detection-using-deep-neural-networks-for-robust-speaker-verification-1909.11886</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-adaptive-soft-voice-activity-detection-using-deep-neural-networks-for-robust-speaker-verification-1909.11886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-adaptive-soft-voice-activity-detection-using-deep-neural-networks-for-robust-speaker-verification-1909.11886"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-transducers-for-end-to-end-speech-recognition-1909.13037</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-transducers-for-end-to-end-speech-recognition-1909.13037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-transducers-for-end-to-end-speech-recognition-1909.13037"/></url>
<url><loc>https://scifaro.com/en/abs/fasnet-low-latency-adaptive-beamforming-for-multi-microphone-audio-processing-1909.13387</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fasnet-low-latency-adaptive-beamforming-for-multi-microphone-audio-processing-1909.13387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fasnet-low-latency-adaptive-beamforming-for-multi-microphone-audio-processing-1909.13387"/></url>
<url><loc>https://scifaro.com/en/abs/dipco-dinner-party-corpus-1909.13447</loc><lastmod>2019-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dipco-dinner-party-corpus-1909.13447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dipco-dinner-party-corpus-1909.13447"/></url>
<url><loc>https://scifaro.com/en/abs/non-native-speaker-verification-for-spoken-language-assessment-1909.13695</loc><lastmod>2019-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-native-speaker-verification-for-spoken-language-assessment-1909.13695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-native-speaker-verification-for-spoken-language-assessment-1909.13695"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-model-adaptation-from-raw-waveforms-with-sincnet-1909.13759</loc><lastmod>2019-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-model-adaptation-from-raw-waveforms-with-sincnet-1909.13759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-model-adaptation-from-raw-waveforms-with-sincnet-1909.13759"/></url>
<url><loc>https://scifaro.com/en/abs/additional-shared-decoder-on-siamese-multi-view-encoders-for-learning-acoustic-word-embeddings-1910.00341</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/additional-shared-decoder-on-siamese-multi-view-encoders-for-learning-acoustic-word-embeddings-1910.00341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/additional-shared-decoder-on-siamese-multi-view-encoders-for-learning-acoustic-word-embeddings-1910.00341"/></url>
<url><loc>https://scifaro.com/en/abs/a-modularized-neural-network-with-language-specific-output-layers-for-cross-lingual-voice-conversion-1910.00496</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-modularized-neural-network-with-language-specific-output-layers-for-cross-lingual-voice-conversion-1910.00496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-modularized-neural-network-with-language-specific-output-layers-for-cross-lingual-voice-conversion-1910.00496"/></url>
<url><loc>https://scifaro.com/en/abs/domain-expansion-in-dnn-based-acoustic-models-for-robust-speech-recognition-1910.00565</loc><lastmod>2019-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-expansion-in-dnn-based-acoustic-models-for-robust-speech-recognition-1910.00565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-expansion-in-dnn-based-acoustic-models-for-robust-speech-recognition-1910.00565"/></url>
<url><loc>https://scifaro.com/en/abs/from-senones-to-chenones-tied-context-dependent-graphemes-for-hybrid-speech-recognition-1910.01493</loc><lastmod>2019-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-senones-to-chenones-tied-context-dependent-graphemes-for-hybrid-speech-recognition-1910.01493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-senones-to-chenones-tied-context-dependent-graphemes-for-hybrid-speech-recognition-1910.01493"/></url>
<url><loc>https://scifaro.com/en/abs/objective-human-affective-vocal-expression-detection-and-automatic-classification-with-stochastic-models-and-learning-systems-1910.01967</loc><lastmod>2019-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-human-affective-vocal-expression-detection-and-automatic-classification-with-stochastic-models-and-learning-systems-1910.01967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-human-affective-vocal-expression-detection-and-automatic-classification-with-stochastic-models-and-learning-systems-1910.01967"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-zero-resource-domain-adaptation-1910.02168</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-zero-resource-domain-adaptation-1910.02168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-zero-resource-domain-adaptation-1910.02168"/></url>
<url><loc>https://scifaro.com/en/abs/effective-acoustic-energy-sensing-exploitation-for-target-sources-localization-in-urban-acoustic-scenes-1910.02709</loc><lastmod>2019-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-acoustic-energy-sensing-exploitation-for-target-sources-localization-in-urban-acoustic-scenes-1910.02709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-acoustic-energy-sensing-exploitation-for-target-sources-localization-in-urban-acoustic-scenes-1910.02709"/></url>
<url><loc>https://scifaro.com/en/abs/impulsive-noise-detection-for-intelligibility-and-quality-improvement-of-speech-enhancement-methods-applied-in-time-domain-1910.02710</loc><lastmod>2019-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impulsive-noise-detection-for-intelligibility-and-quality-improvement-of-speech-enhancement-methods-applied-in-time-domain-1910.02710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impulsive-noise-detection-for-intelligibility-and-quality-improvement-of-speech-enhancement-methods-applied-in-time-domain-1910.02710"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-reverberation-absorption-using-non-stationary-masking-components-detection-for-intelligibility-improvement-1910.02712</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-reverberation-absorption-using-non-stationary-masking-components-detection-for-intelligibility-improvement-1910.02712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-reverberation-absorption-using-non-stationary-masking-components-detection-for-intelligibility-improvement-1910.02712"/></url>
<url><loc>https://scifaro.com/en/abs/melgan-vc-voice-conversion-and-audio-style-transfer-on-arbitrarily-long-samples-using-spectrograms-1910.03713</loc><lastmod>2019-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melgan-vc-voice-conversion-and-audio-style-transfer-on-arbitrarily-long-samples-using-spectrograms-1910.03713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melgan-vc-voice-conversion-and-audio-style-transfer-on-arbitrarily-long-samples-using-spectrograms-1910.03713"/></url>
<url><loc>https://scifaro.com/en/abs/first-order-ambisonics-domain-spatial-augmentation-for-dnn-based-direction-of-arrival-estimation-1910.04388</loc><lastmod>2019-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/first-order-ambisonics-domain-spatial-augmentation-for-dnn-based-direction-of-arrival-estimation-1910.04388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/first-order-ambisonics-domain-spatial-augmentation-for-dnn-based-direction-of-arrival-estimation-1910.04388"/></url>
<url><loc>https://scifaro.com/en/abs/doa-estimation-by-dnn-based-denoising-and-dereverberation-from-sound-intensity-vector-1910.04415</loc><lastmod>2019-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/doa-estimation-by-dnn-based-denoising-and-dereverberation-from-sound-intensity-vector-1910.04415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/doa-estimation-by-dnn-based-denoising-and-dereverberation-from-sound-intensity-vector-1910.04415"/></url>
<url><loc>https://scifaro.com/en/abs/identifying-mood-episodes-using-dialogue-features-from-clinical-interviews-1910.05115</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identifying-mood-episodes-using-dialogue-features-from-clinical-interviews-1910.05115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identifying-mood-episodes-using-dialogue-features-from-clinical-interviews-1910.05115"/></url>
<url><loc>https://scifaro.com/en/abs/the-theory-behind-controllable-expressive-speech-synthesis-a-cross-disciplinary-approach-1910.06234</loc><lastmod>2019-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-theory-behind-controllable-expressive-speech-synthesis-a-cross-disciplinary-approach-1910.06234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-theory-behind-controllable-expressive-speech-synthesis-a-cross-disciplinary-approach-1910.06234"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-rnn-efficient-long-sequence-modeling-for-time-domain-single-channel-speech-separation-1910.06379</loc><lastmod>2020-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-rnn-efficient-long-sequence-modeling-for-time-domain-single-channel-speech-separation-1910.06379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-rnn-efficient-long-sequence-modeling-for-time-domain-single-channel-speech-separation-1910.06379"/></url>
<url><loc>https://scifaro.com/en/abs/mimo-speech-end-to-end-multi-channel-multi-speaker-speech-recognition-1910.06522</loc><lastmod>2019-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimo-speech-end-to-end-multi-channel-multi-speaker-speech-recognition-1910.06522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimo-speech-end-to-end-multi-channel-multi-speaker-speech-recognition-1910.06522"/></url>
<url><loc>https://scifaro.com/en/abs/melgan-generative-adversarial-networks-for-conditional-waveform-synthesis-1910.06711</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/melgan-generative-adversarial-networks-for-conditional-waveform-synthesis-1910.06711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/melgan-generative-adversarial-networks-for-conditional-waveform-synthesis-1910.06711"/></url>
<url><loc>https://scifaro.com/en/abs/t-gsa-transformer-with-gaussian-weighted-self-attention-for-speech-enhancement-1910.06762</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-gsa-transformer-with-gaussian-weighted-self-attention-for-speech-enhancement-1910.06762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-gsa-transformer-with-gaussian-weighted-self-attention-for-speech-enhancement-1910.06762"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-large-receptive-field-convolutional-networks-for-distant-speech-recognition-1910.07047</loc><lastmod>2019-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-large-receptive-field-convolutional-networks-for-distant-speech-recognition-1910.07047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-large-receptive-field-convolutional-networks-for-distant-speech-recognition-1910.07047"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-asr-with-contextual-block-processing-1910.07204</loc><lastmod>2019-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-asr-with-contextual-block-processing-1910.07204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-asr-with-contextual-block-processing-1910.07204"/></url>
<url><loc>https://scifaro.com/en/abs/multi-talker-mvdr-beamforming-based-on-extended-complex-gaussian-mixture-model-1910.07753</loc><lastmod>2019-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-talker-mvdr-beamforming-based-on-extended-complex-gaussian-mixture-model-1910.07753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-talker-mvdr-beamforming-based-on-extended-complex-gaussian-mixture-model-1910.07753"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-the-robust-evaluation-of-sound-event-detection-1910.08440</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-the-robust-evaluation-of-sound-event-detection-1910.08440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-the-robust-evaluation-of-sound-event-detection-1910.08440"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-time-varying-covariance-matrix-model-for-late-reverberation-reduction-1910.08710</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-time-varying-covariance-matrix-model-for-late-reverberation-reduction-1910.08710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-time-varying-covariance-matrix-model-for-late-reverberation-reduction-1910.08710"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-on-spoofing-countermeasures-of-automatic-speaker-verification-1910.08716</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-on-spoofing-countermeasures-of-automatic-speaker-verification-1910.08716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-on-spoofing-countermeasures-of-automatic-speaker-verification-1910.08716"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-sliding-generalized-cross-correlation-a-sub-band-time-delay-estimation-approach-1910.08838</loc><lastmod>2020-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-sliding-generalized-cross-correlation-a-sub-band-time-delay-estimation-approach-1910.08838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-sliding-generalized-cross-correlation-a-sub-band-time-delay-estimation-approach-1910.08838"/></url>
<url><loc>https://scifaro.com/en/abs/but-system-description-for-dihard-speech-diarization-challenge-2019-1910.08847</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-system-description-for-dihard-speech-diarization-challenge-2019-1910.08847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-system-description-for-dihard-speech-diarization-challenge-2019-1910.08847"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-with-dual-sequence-lstm-architecture-1910.08874</loc><lastmod>2020-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-dual-sequence-lstm-architecture-1910.08874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-dual-sequence-lstm-architecture-1910.08874"/></url>
<url><loc>https://scifaro.com/en/abs/using-speech-synthesis-to-train-end-to-end-spoken-language-understanding-models-1910.09463</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-speech-synthesis-to-train-end-to-end-spoken-language-understanding-models-1910.09463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-speech-synthesis-to-train-end-to-end-spoken-language-understanding-models-1910.09463"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-individual-hrtfs-based-on-spatial-principal-component-analysis-1910.09484</loc><lastmod>2020-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-individual-hrtfs-based-on-spatial-principal-component-analysis-1910.09484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-individual-hrtfs-based-on-spatial-principal-component-analysis-1910.09484"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-study-between-adversarial-networks-and-classical-techniques-for-speech-enhancement-1910.09522</loc><lastmod>2019-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-study-between-adversarial-networks-and-classical-techniques-for-speech-enhancement-1910.09522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-study-between-adversarial-networks-and-classical-techniques-for-speech-enhancement-1910.09522"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-neural-clustering-for-speaker-diarisation-1910.09703</loc><lastmod>2020-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-neural-clustering-for-speaker-diarisation-1910.09703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-neural-clustering-for-speaker-diarisation-1910.09703"/></url>
<url><loc>https://scifaro.com/en/abs/joint-spatial-filter-and-time-varying-mclp-for-dereverberation-and-interference-suppression-of-a-dynamic-static-speech-source-1910.09782</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-spatial-filter-and-time-varying-mclp-for-dereverberation-and-interference-suppression-of-a-dynamic-static-speech-source-1910.09782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-spatial-filter-and-time-varying-mclp-for-dereverberation-and-interference-suppression-of-a-dynamic-static-speech-source-1910.09782"/></url>
<url><loc>https://scifaro.com/en/abs/spiking-neural-networks-trained-with-backpropagation-for-low-power-neuromorphic-implementation-of-voice-activity-detection-1910.09993</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spiking-neural-networks-trained-with-backpropagation-for-low-power-neuromorphic-implementation-of-voice-activity-detection-1910.09993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spiking-neural-networks-trained-with-backpropagation-for-low-power-neuromorphic-implementation-of-voice-activity-detection-1910.09993"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-using-crnn-on-pairs-of-microphones-1910.10049</loc><lastmod>2019-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-crnn-on-pairs-of-microphones-1910.10049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-crnn-on-pairs-of-microphones-1910.10049"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-plate-and-spring-reverberation-using-a-dsp-informed-deep-neural-network-1910.10105</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-plate-and-spring-reverberation-using-a-dsp-informed-deep-neural-network-1910.10105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-plate-and-spring-reverberation-using-a-dsp-informed-deep-neural-network-1910.10105"/></url>
<url><loc>https://scifaro.com/en/abs/gci-detection-from-raw-speech-using-a-fully-convolutional-network-1910.10235</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gci-detection-from-raw-speech-using-a-fully-convolutional-network-1910.10235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gci-detection-from-raw-speech-using-a-fully-convolutional-network-1910.10235"/></url>
<url><loc>https://scifaro.com/en/abs/quartznet-deep-automatic-speech-recognition-with-1d-time-channel-separable-convolutions-1910.10261</loc><lastmod>2019-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quartznet-deep-automatic-speech-recognition-with-1d-time-channel-separable-convolutions-1910.10261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quartznet-deep-automatic-speech-recognition-with-1d-time-channel-separable-convolutions-1910.10261"/></url>
<url><loc>https://scifaro.com/en/abs/a-transformer-with-interleaved-self-attention-and-convolution-for-hybrid-acoustic-models-1910.10352</loc><lastmod>2019-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transformer-with-interleaved-self-attention-and-convolution-for-hybrid-acoustic-models-1910.10352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transformer-with-interleaved-self-attention-and-convolution-for-hybrid-acoustic-models-1910.10352"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-architectures-for-asr-free-spoken-language-understanding-1910.10599</loc><lastmod>2020-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-architectures-for-asr-free-spoken-language-understanding-1910.10599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-architectures-for-asr-free-spoken-language-understanding-1910.10599"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-domain-adversarial-voice-activity-detection-1910.10655</loc><lastmod>2020-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-domain-adversarial-voice-activity-detection-1910.10655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-domain-adversarial-voice-activity-detection-1910.10655"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-multi-speaker-text-to-speech-with-state-of-the-art-neural-speaker-embeddings-1910.10838</loc><lastmod>2020-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-multi-speaker-text-to-speech-with-state-of-the-art-neural-speaker-embeddings-1910.10838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-multi-speaker-text-to-speech-with-state-of-the-art-neural-speaker-embeddings-1910.10838"/></url>
<url><loc>https://scifaro.com/en/abs/learning-deep-representations-by-multilayer-bootstrap-networks-for-speaker-diarization-1910.10969</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-deep-representations-by-multilayer-bootstrap-networks-for-speaker-diarization-1910.10969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-deep-representations-by-multilayer-bootstrap-networks-for-speaker-diarization-1910.10969"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-the-impact-of-speaker-localization-errors-on-speech-separation-for-automatic-speech-recognition-1910.11114</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-the-impact-of-speaker-localization-errors-on-speech-separation-for-automatic-speech-recognition-1910.11114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-the-impact-of-speaker-localization-errors-on-speech-separation-for-automatic-speech-recognition-1910.11114"/></url>
<url><loc>https://scifaro.com/en/abs/slogd-speaker-location-guided-deflation-approach-to-speech-separation-1910.11131</loc><lastmod>2019-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slogd-speaker-location-guided-deflation-approach-to-speech-separation-1910.11131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slogd-speaker-location-guided-deflation-approach-to-speech-separation-1910.11131"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-using-latent-space-clustering-in-generative-adversarial-network-1910.11398</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-using-latent-space-clustering-in-generative-adversarial-network-1910.11398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-using-latent-space-clustering-in-generative-adversarial-network-1910.11398"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-for-robust-child-adult-classification-from-speech-1910.11400</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-for-robust-child-adult-classification-from-speech-1910.11400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-for-robust-child-adult-classification-from-speech-1910.11400"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-semi-supervised-speaker-diarization-system-using-gan-mixture-model-1910.11416</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-semi-supervised-speaker-diarization-system-using-gan-mixture-model-1910.11416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-semi-supervised-speaker-diarization-system-using-gan-mixture-model-1910.11416"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-long-form-speech-using-streaming-end-to-end-models-1910.11455</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-long-form-speech-using-streaming-end-to-end-models-1910.11455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-long-form-speech-using-streaming-end-to-end-models-1910.11455"/></url>
<url><loc>https://scifaro.com/en/abs/learning-domain-invariant-representations-for-child-adult-classification-from-speech-1910.11472</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-domain-invariant-representations-for-child-adult-classification-from-speech-1910.11472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-domain-invariant-representations-for-child-adult-classification-from-speech-1910.11472"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-wavegan-a-fast-waveform-generation-model-based-on-generative-adversarial-networks-with-multi-resolution-spectrogram-1910.11480</loc><lastmod>2020-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-wavegan-a-fast-waveform-generation-model-based-on-generative-adversarial-networks-with-multi-resolution-spectrogram-1910.11480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-wavegan-a-fast-waveform-generation-model-based-on-generative-adversarial-networks-with-multi-resolution-spectrogram-1910.11480"/></url>
<url><loc>https://scifaro.com/en/abs/structural-sparsification-for-far-field-speaker-recognition-with-gna-1910.11488</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structural-sparsification-for-far-field-speaker-recognition-with-gna-1910.11488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structural-sparsification-for-far-field-speaker-recognition-with-gna-1910.11488"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-phase-gammatone-filterbank-for-speech-separation-via-tasnet-1910.11615</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-phase-gammatone-filterbank-for-speech-separation-via-tasnet-1910.11615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-phase-gammatone-filterbank-for-speech-separation-via-tasnet-1910.11615"/></url>
<url><loc>https://scifaro.com/en/abs/overlap-aware-diarization-resegmentation-using-neural-end-to-end-overlapped-speech-detection-1910.11646</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overlap-aware-diarization-resegmentation-using-neural-end-to-end-overlapped-speech-detection-1910.11646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overlap-aware-diarization-resegmentation-using-neural-end-to-end-overlapped-speech-detection-1910.11646"/></url>
<url><loc>https://scifaro.com/en/abs/spice-self-supervised-pitch-estimation-1910.11664</loc><lastmod>2020-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spice-self-supervised-pitch-estimation-1910.11664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spice-self-supervised-pitch-estimation-1910.11664"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-high-quality-singing-voice-synthesis-system-based-on-convolutional-neural-networks-1910.11690</loc><lastmod>2020-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-high-quality-singing-voice-synthesis-system-based-on-convolutional-neural-networks-1910.11690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-high-quality-singing-voice-synthesis-system-based-on-convolutional-neural-networks-1910.11690"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-blind-audio-source-extraction-supervised-by-dominant-speaker-identification-using-x-vectors-1910.11824</loc><lastmod>2019-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-blind-audio-source-extraction-supervised-by-dominant-speaker-identification-using-x-vectors-1910.11824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-blind-audio-source-extraction-supervised-by-dominant-speaker-identification-using-x-vectors-1910.11824"/></url>
<url><loc>https://scifaro.com/en/abs/towards-online-end-to-end-transformer-automatic-speech-recognition-1910.11871</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-online-end-to-end-transformer-automatic-speech-recognition-1910.11871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-online-end-to-end-transformer-automatic-speech-recognition-1910.11871"/></url>
<url><loc>https://scifaro.com/en/abs/feature-enhancement-with-deep-feature-losses-for-speaker-verification-1910.11905</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-enhancement-with-deep-feature-losses-for-speaker-verification-1910.11905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-enhancement-with-deep-feature-losses-for-speaker-verification-1910.11905"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-domain-adaptation-for-speaker-recognition-using-cycle-gans-1910.11909</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-domain-adaptation-for-speaker-recognition-using-cycle-gans-1910.11909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-domain-adaptation-for-speaker-recognition-using-cycle-gans-1910.11909"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-representations-via-phase-prediction-1910.11910</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-representations-via-phase-prediction-1910.11910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-representations-via-phase-prediction-1910.11910"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-feature-enhancement-for-speaker-verification-1910.11915</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-feature-enhancement-for-speaker-verification-1910.11915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-feature-enhancement-for-speaker-verification-1910.11915"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-estimation-for-black-box-automatic-speech-recognition-systems-using-lattice-recurrent-neural-networks-1910.11933</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-estimation-for-black-box-automatic-speech-recognition-systems-using-lattice-recurrent-neural-networks-1910.11933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-estimation-for-black-box-automatic-speech-recognition-systems-using-lattice-recurrent-neural-networks-1910.11933"/></url>
<url><loc>https://scifaro.com/en/abs/sum-product-networks-for-robust-automatic-speaker-identification-1910.11969</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sum-product-networks-for-robust-automatic-speaker-identification-1910.11969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sum-product-networks-for-robust-automatic-speaker-identification-1910.11969"/></url>
<url><loc>https://scifaro.com/en/abs/image-to-image-translation-based-on-convolutional-neural-network-approach-for-speech-declipping-1910.12116</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/image-to-image-translation-based-on-convolutional-neural-network-approach-for-speech-declipping-1910.12116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/image-to-image-translation-based-on-convolutional-neural-network-approach-for-speech-declipping-1910.12116"/></url>
<url><loc>https://scifaro.com/en/abs/transferring-neural-speech-waveform-synthesizers-to-musical-instrument-sounds-generation-1910.12381</loc><lastmod>2019-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferring-neural-speech-waveform-synthesizers-to-musical-instrument-sounds-generation-1910.12381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferring-neural-speech-waveform-synthesizers-to-musical-instrument-sounds-generation-1910.12381"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-choice-of-probability-distribution-randomness-and-search-methods-for-alignment-modeling-in-sequence-to-sequence-text-to-speech-synthesis-using-hard-alignment-1910.12383</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-choice-of-probability-distribution-randomness-and-search-methods-for-alignment-modeling-in-sequence-to-sequence-text-to-speech-synthesis-using-hard-alignment-1910.12383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-choice-of-probability-distribution-randomness-and-search-methods-for-alignment-modeling-in-sequence-to-sequence-text-to-speech-synthesis-using-hard-alignment-1910.12383"/></url>
<url><loc>https://scifaro.com/en/abs/a-bin-encoding-training-of-a-spiking-neural-network-based-voice-activity-detection-1910.12459</loc><lastmod>2021-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bin-encoding-training-of-a-spiking-neural-network-based-voice-activity-detection-1910.12459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bin-encoding-training-of-a-spiking-neural-network-based-voice-activity-detection-1910.12459"/></url>
<url><loc>https://scifaro.com/en/abs/label-efficient-audio-classification-through-multitask-learning-and-self-supervision-1910.12587</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-efficient-audio-classification-through-multitask-learning-and-self-supervision-1910.12587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-efficient-audio-classification-through-multitask-learning-and-self-supervision-1910.12587"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-multiple-speech-disfluencies-using-a-deep-residual-network-with-bidirectional-long-short-term-memory-1910.12590</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-multiple-speech-disfluencies-using-a-deep-residual-network-with-bidirectional-long-short-term-memory-1910.12590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-multiple-speech-disfluencies-using-a-deep-residual-network-with-bidirectional-long-short-term-memory-1910.12590"/></url>
<url><loc>https://scifaro.com/en/abs/but-system-description-to-voxceleb-speaker-recognition-challenge-2019-1910.12592</loc><lastmod>2019-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-system-description-to-voxceleb-speaker-recognition-challenge-2019-1910.12592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-system-description-to-voxceleb-speaker-recognition-challenge-2019-1910.12592"/></url>
<url><loc>https://scifaro.com/en/abs/generative-pre-training-for-speech-with-autoregressive-predictive-coding-1910.12607</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-pre-training-for-speech-with-autoregressive-predictive-coding-1910.12607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-pre-training-for-speech-with-autoregressive-predictive-coding-1910.12607"/></url>
<url><loc>https://scifaro.com/en/abs/g2g-tts-driven-pronunciation-learning-for-graphemic-hybrid-asr-1910.12612</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/g2g-tts-driven-pronunciation-learning-for-graphemic-hybrid-asr-1910.12612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/g2g-tts-driven-pronunciation-learning-for-graphemic-hybrid-asr-1910.12612"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-voice-conversion-of-spectral-envelopes-using-adversarial-weights-1910.12614</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-voice-conversion-of-spectral-envelopes-using-adversarial-weights-1910.12614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-voice-conversion-of-spectral-envelopes-using-adversarial-weights-1910.12614"/></url>
<url><loc>https://scifaro.com/en/abs/aegan-time-frequency-speech-denoising-via-generative-adversarial-networks-1910.12620</loc><lastmod>2020-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aegan-time-frequency-speech-denoising-via-generative-adversarial-networks-1910.12620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aegan-time-frequency-speech-denoising-via-generative-adversarial-networks-1910.12620"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-separation-and-transcription-of-mixtures-with-multiple-polyphonic-and-percussive-instruments-1910.12621</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-separation-and-transcription-of-mixtures-with-multiple-polyphonic-and-percussive-instruments-1910.12621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-separation-and-transcription-of-mixtures-with-multiple-polyphonic-and-percussive-instruments-1910.12621"/></url>
<url><loc>https://scifaro.com/en/abs/model-selection-for-deep-audio-source-separation-via-clustering-analysis-1910.12626</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-selection-for-deep-audio-source-separation-via-clustering-analysis-1910.12626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-selection-for-deep-audio-source-separation-via-clustering-analysis-1910.12626"/></url>
<url><loc>https://scifaro.com/en/abs/mockingjay-unsupervised-speech-representation-learning-with-deep-bidirectional-transformer-encoders-1910.12638</loc><lastmod>2020-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mockingjay-unsupervised-speech-representation-learning-with-deep-bidirectional-transformer-encoders-1910.12638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mockingjay-unsupervised-speech-representation-learning-with-deep-bidirectional-transformer-encoders-1910.12638"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-transducer-end-to-end-speech-recognition-with-self-attention-1910.12977</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-transducer-end-to-end-speech-recognition-with-self-attention-1910.12977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-transducer-end-to-end-speech-recognition-with-self-attention-1910.12977"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-speaker-verification-systems-with-deep-multi-speaker-text-to-speech-synthesis-1910.13054</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-speaker-verification-systems-with-deep-multi-speaker-text-to-speech-synthesis-1910.13054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-speaker-verification-systems-with-deep-multi-speaker-text-to-speech-synthesis-1910.13054"/></url>
<url><loc>https://scifaro.com/en/abs/mixup-breakdown-a-consistency-training-method-for-improving-generalization-of-speech-separation-models-1910.13253</loc><lastmod>2020-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixup-breakdown-a-consistency-training-method-for-improving-generalization-of-speech-separation-models-1910.13253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixup-breakdown-a-consistency-training-method-for-improving-generalization-of-speech-separation-models-1910.13253"/></url>
<url><loc>https://scifaro.com/en/abs/dr-vot-measuring-positive-and-negative-voice-onset-time-in-the-wild-1910.13255</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dr-vot-measuring-positive-and-negative-voice-onset-time-in-the-wild-1910.13255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dr-vot-measuring-positive-and-negative-voice-onset-time-in-the-wild-1910.13255"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-cross-lingual-voice-cloning-approach-with-a-few-text-free-samples-1910.13276</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-cross-lingual-voice-cloning-approach-with-a-few-text-free-samples-1910.13276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-cross-lingual-voice-cloning-approach-with-a-few-text-free-samples-1910.13276"/></url>
<url><loc>https://scifaro.com/en/abs/dfsmn-san-with-persistent-memory-model-for-automatic-speech-recognition-1910.13282</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dfsmn-san-with-persistent-memory-model-for-automatic-speech-recognition-1910.13282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dfsmn-san-with-persistent-memory-model-for-automatic-speech-recognition-1910.13282"/></url>
<url><loc>https://scifaro.com/en/abs/improving-sequence-to-sequence-speech-recognition-training-with-on-the-fly-data-augmentation-1910.13296</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-sequence-to-sequence-speech-recognition-training-with-on-the-fly-data-augmentation-1910.13296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-sequence-to-sequence-speech-recognition-training-with-on-the-fly-data-augmentation-1910.13296"/></url>
<url><loc>https://scifaro.com/en/abs/replay-spoofing-countermeasure-using-autoencoder-and-siamese-network-on-asvspoof-2019-challenge-1910.13345</loc><lastmod>2019-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/replay-spoofing-countermeasure-using-autoencoder-and-siamese-network-on-asvspoof-2019-challenge-1910.13345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/replay-spoofing-countermeasure-using-autoencoder-and-siamese-network-on-asvspoof-2019-challenge-1910.13345"/></url>
<url><loc>https://scifaro.com/en/abs/does-speech-enhancement-of-publicly-available-data-help-build-robust-speech-recognition-systems-1910.13488</loc><lastmod>2019-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-speech-enhancement-of-publicly-available-data-help-build-robust-speech-recognition-systems-1910.13488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-speech-enhancement-of-publicly-available-data-help-build-robust-speech-recognition-systems-1910.13488"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-fuzzy-logic-based-metric-for-audio-quality-assessment-objective-audio-quality-assessment-1910.13571</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-fuzzy-logic-based-metric-for-audio-quality-assessment-objective-audio-quality-assessment-1910.13571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-fuzzy-logic-based-metric-for-audio-quality-assessment-objective-audio-quality-assessment-1910.13571"/></url>
<url><loc>https://scifaro.com/en/abs/metric-learning-with-background-noise-class-for-few-shot-detection-of-rare-sound-events-1910.13724</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-learning-with-background-noise-class-for-few-shot-detection-of-rare-sound-events-1910.13724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-learning-with-background-noise-class-for-few-shot-detection-of-rare-sound-events-1910.13724"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-learning-for-classroom-activity-detection-1910.13799</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-learning-for-classroom-activity-detection-1910.13799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-learning-for-classroom-activity-detection-1910.13799"/></url>
<url><loc>https://scifaro.com/en/abs/indian-emospeech-command-dataset-a-dataset-for-emotion-based-speech-recognition-in-the-wild-1910.13801</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/indian-emospeech-command-dataset-a-dataset-for-emotion-based-speech-recognition-in-the-wild-1910.13801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/indian-emospeech-command-dataset-a-dataset-for-emotion-based-speech-recognition-in-the-wild-1910.13801"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-representation-learning-with-future-observation-prediction-for-speech-emotion-recognition-1910.13806</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-representation-learning-with-future-observation-prediction-for-speech-emotion-recognition-1910.13806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-representation-learning-with-future-observation-prediction-for-speech-emotion-recognition-1910.13806"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adversarial-learning-for-emotion-recognition-1910.13807</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adversarial-learning-for-emotion-recognition-1910.13807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adversarial-learning-for-emotion-recognition-1910.13807"/></url>
<url><loc>https://scifaro.com/en/abs/overlapped-speech-recognition-from-a-jointly-learned-multi-channel-neural-speech-extraction-and-representation-1910.13825</loc><lastmod>2019-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overlapped-speech-recognition-from-a-jointly-learned-multi-channel-neural-speech-extraction-and-representation-1910.13825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overlapped-speech-recognition-from-a-jointly-learned-multi-channel-neural-speech-extraction-and-representation-1910.13825"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-microphone-permutation-and-number-invariant-multi-channel-speech-separation-1910.14104</loc><lastmod>2020-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-microphone-permutation-and-number-invariant-multi-channel-speech-separation-1910.14104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-microphone-permutation-and-number-invariant-multi-channel-speech-separation-1910.14104"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-estimating-articulatory-movements-from-phoneme-sequences-and-acoustic-features-1910.14375</loc><lastmod>2020-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-estimating-articulatory-movements-from-phoneme-sequences-and-acoustic-features-1910.14375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-estimating-articulatory-movements-from-phoneme-sequences-and-acoustic-features-1910.14375"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-rakugo-speech-and-its-limitations-toward-speech-synthesis-that-entertains-audiences-1911.00137</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-rakugo-speech-and-its-limitations-toward-speech-synthesis-that-entertains-audiences-1911.00137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-rakugo-speech-and-its-limitations-toward-speech-synthesis-that-entertains-audiences-1911.00137"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-networks-for-emotion-recognition-combining-audio-and-transcripts-1911.00432</loc><lastmod>2019-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-networks-for-emotion-recognition-combining-audio-and-transcripts-1911.00432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-networks-for-emotion-recognition-combining-audio-and-transcripts-1911.00432"/></url>
<url><loc>https://scifaro.com/en/abs/memory-requirement-reduction-of-deep-neural-networks-using-low-bit-quantization-of-parameters-1911.00527</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-requirement-reduction-of-deep-neural-networks-using-low-bit-quantization-of-parameters-1911.00527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-requirement-reduction-of-deep-neural-networks-using-low-bit-quantization-of-parameters-1911.00527"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-word-error-rate-for-reverberant-speech-1911.00566</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-word-error-rate-for-reverberant-speech-1911.00566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-word-error-rate-for-reverberant-speech-1911.00566"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speaker-recognition-using-unsupervised-adversarial-invariance-1911.00940</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speaker-recognition-using-unsupervised-adversarial-invariance-1911.00940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speaker-recognition-using-unsupervised-adversarial-invariance-1911.00940"/></url>
<url><loc>https://scifaro.com/en/abs/onssen-an-open-source-speech-separation-and-enhancement-library-1911.00982</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/onssen-an-open-source-speech-separation-and-enhancement-library-1911.00982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/onssen-an-open-source-speech-separation-and-enhancement-library-1911.00982"/></url>
<url><loc>https://scifaro.com/en/abs/voice-biometrics-security-extrapolating-false-alarm-rate-via-hierarchical-bayesian-modeling-of-speaker-verification-scores-1911.01182</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-biometrics-security-extrapolating-false-alarm-rate-via-hierarchical-bayesian-modeling-of-speaker-verification-scores-1911.01182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-biometrics-security-extrapolating-false-alarm-rate-via-hierarchical-bayesian-modeling-of-speaker-verification-scores-1911.01182"/></url>
<url><loc>https://scifaro.com/en/abs/pyannote-audio-neural-building-blocks-for-speaker-diarization-1911.01255</loc><lastmod>2019-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pyannote-audio-neural-building-blocks-for-speaker-diarization-1911.01255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pyannote-audio-neural-building-blocks-for-speaker-diarization-1911.01255"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-online-diarization-with-sample-mean-loss-for-multi-domain-data-1911.01266</loc><lastmod>2019-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-online-diarization-with-sample-mean-loss-for-multi-domain-data-1911.01266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-online-diarization-with-sample-mean-loss-for-multi-domain-data-1911.01266"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-invariant-affective-representation-learning-via-adversarial-training-1911.01533</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-invariant-affective-representation-learning-via-adversarial-training-1911.01533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-invariant-affective-representation-learning-via-adversarial-training-1911.01533"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2019-a-large-scale-public-database-of-synthesized-converted-and-replayed-speech-1911.01601</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2019-a-large-scale-public-database-of-synthesized-converted-and-replayed-speech-1911.01601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2019-a-large-scale-public-database-of-synthesized-converted-and-replayed-speech-1911.01601"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-speech-synthesis-with-rich-and-granularized-control-1911.01635</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-speech-synthesis-with-rich-and-granularized-control-1911.01635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-speech-synthesis-with-rich-and-granularized-control-1911.01635"/></url>
<url><loc>https://scifaro.com/en/abs/cn-celeb-a-challenging-chinese-speaker-recognition-dataset-1911.01799</loc><lastmod>2019-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cn-celeb-a-challenging-chinese-speaker-recognition-dataset-1911.01799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cn-celeb-a-challenging-chinese-speaker-recognition-dataset-1911.01799"/></url>
<url><loc>https://scifaro.com/en/abs/fast-acoustic-scattering-using-convolutional-neural-networks-1911.01802</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-acoustic-scattering-using-convolutional-neural-networks-1911.01802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-acoustic-scattering-using-convolutional-neural-networks-1911.01802"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-factorized-auto-encoder-for-unsupervised-hierarchical-deep-factorization-of-speech-signal-1911.01806</loc><lastmod>2019-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-factorized-auto-encoder-for-unsupervised-hierarchical-deep-factorization-of-speech-signal-1911.01806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-factorized-auto-encoder-for-unsupervised-hierarchical-deep-factorization-of-speech-signal-1911.01806"/></url>
<url><loc>https://scifaro.com/en/abs/who-is-real-bob-adversarial-attacks-on-speaker-recognition-systems-1911.01840</loc><lastmod>2020-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-is-real-bob-adversarial-attacks-on-speaker-recognition-systems-1911.01840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-is-real-bob-adversarial-attacks-on-speaker-recognition-systems-1911.01840"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-via-deep-spectrum-image-translation-network-1911.01902</loc><lastmod>2019-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-via-deep-spectrum-image-translation-network-1911.01902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-via-deep-spectrum-image-translation-network-1911.01902"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-keyword-spotting-on-raw-audio-data-with-sinc-convolutions-1911.02086</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-on-raw-audio-data-with-sinc-convolutions-1911.02086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-on-raw-audio-data-with-sinc-convolutions-1911.02086"/></url>
<url><loc>https://scifaro.com/en/abs/closing-the-training-inference-gap-for-deep-attractor-networks-1911.02091</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/closing-the-training-inference-gap-for-deep-attractor-networks-1911.02091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/closing-the-training-inference-gap-for-deep-attractor-networks-1911.02091"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-attention-for-far-field-speech-recognition-with-deep-beamforming-neural-networks-1911.02115</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-attention-for-far-field-speech-recognition-with-deep-beamforming-neural-networks-1911.02115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-attention-for-far-field-speech-recognition-with-deep-beamforming-neural-networks-1911.02115"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-ambiguity-of-emotion-labels-through-meta-learning-1911.02216</loc><lastmod>2019-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-ambiguity-of-emotion-labels-through-meta-learning-1911.02216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-ambiguity-of-emotion-labels-through-meta-learning-1911.02216"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-end-to-end-models-for-long-form-speech-recognition-1911.02242</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-end-to-end-models-for-long-form-speech-recognition-1911.02242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-end-to-end-models-for-long-form-speech-recognition-1911.02242"/></url>
<url><loc>https://scifaro.com/en/abs/the-speed-submission-to-dihard-ii-contributions-lessons-learned-1911.02388</loc><lastmod>2019-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-speed-submission-to-dihard-ii-contributions-lessons-learned-1911.02388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-speed-submission-to-dihard-ii-contributions-lessons-learned-1911.02388"/></url>
<url><loc>https://scifaro.com/en/abs/mask-dependent-phase-estimation-for-monaural-speaker-separation-1911.02746</loc><lastmod>2020-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-dependent-phase-estimation-for-monaural-speaker-separation-1911.02746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-dependent-phase-estimation-for-monaural-speaker-separation-1911.02746"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-on-gmm-i-vector-based-speaker-verification-systems-1911.03078</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-on-gmm-i-vector-based-speaker-verification-systems-1911.03078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-on-gmm-i-vector-based-speaker-verification-systems-1911.03078"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-and-noise-reduction-for-both-diffusive-noise-field-and-point-noise-source-in-binaural-hearing-aids-preliminary-version-1911.03750</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-and-noise-reduction-for-both-diffusive-noise-field-and-point-noise-source-in-binaural-hearing-aids-preliminary-version-1911.03750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-and-noise-reduction-for-both-diffusive-noise-field-and-point-noise-source-in-binaural-hearing-aids-preliminary-version-1911.03750"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-dynamically-varying-acoustic-scenes-from-egocentric-audio-recordings-in-workplace-setting-1911.03843</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-dynamically-varying-acoustic-scenes-from-egocentric-audio-recordings-in-workplace-setting-1911.03843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-dynamically-varying-acoustic-scenes-from-egocentric-audio-recordings-in-workplace-setting-1911.03843"/></url>
<url><loc>https://scifaro.com/en/abs/robust-unsupervised-audio-visual-speech-enhancement-using-a-mixture-of-variational-autoencoders-1911.03930</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-unsupervised-audio-visual-speech-enhancement-using-a-mixture-of-variational-autoencoders-1911.03930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-unsupervised-audio-visual-speech-enhancement-using-a-mixture-of-variational-autoencoders-1911.03930"/></url>
<url><loc>https://scifaro.com/en/abs/improved-large-margin-softmax-loss-for-speaker-diarisation-1911.03970</loc><lastmod>2020-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-large-margin-softmax-loss-for-speaker-diarisation-1911.03970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-large-margin-softmax-loss-for-speaker-diarisation-1911.03970"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-training-for-deep-speech-source-separation-with-kullback-leibler-divergence-based-probabilistic-loss-function-1911.04228</loc><lastmod>2019-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-training-for-deep-speech-source-separation-with-kullback-leibler-divergence-based-probabilistic-loss-function-1911.04228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-training-for-deep-speech-source-separation-with-kullback-leibler-divergence-based-probabilistic-loss-function-1911.04228"/></url>
<url><loc>https://scifaro.com/en/abs/segment-relevance-estimation-for-audio-analysis-and-weakly-labelled-classification-1911.04666</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segment-relevance-estimation-for-audio-analysis-and-weakly-labelled-classification-1911.04666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segment-relevance-estimation-for-audio-analysis-and-weakly-labelled-classification-1911.04666"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-speech-events-and-speaker-characteristics-through-photo-plethysmographic-signal-neural-processing-1911.04808</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-speech-events-and-speaker-characteristics-through-photo-plethysmographic-signal-neural-processing-1911.04808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-speech-events-and-speaker-characteristics-through-photo-plethysmographic-signal-neural-processing-1911.04808"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-approach-for-lexical-stress-detection-based-on-transformer-1911.04862</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-lexical-stress-detection-based-on-transformer-1911.04862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-approach-for-lexical-stress-detection-based-on-transformer-1911.04862"/></url>
<url><loc>https://scifaro.com/en/abs/recurrent-neural-network-transducer-for-audio-visual-speech-recognition-1911.04890</loc><lastmod>2019-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recurrent-neural-network-transducer-for-audio-visual-speech-recognition-1911.04890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recurrent-neural-network-transducer-for-audio-visual-speech-recognition-1911.04890"/></url>
<url><loc>https://scifaro.com/en/abs/listen-and-fill-in-the-missing-letters-non-autoregressive-transformer-for-speech-recognition-1911.04908</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-and-fill-in-the-missing-letters-non-autoregressive-transformer-for-speech-recognition-1911.04908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-and-fill-in-the-missing-letters-non-autoregressive-transformer-for-speech-recognition-1911.04908"/></url>
<url><loc>https://scifaro.com/en/abs/warriors-of-the-word-deciphering-lyrical-topics-in-music-and-their-connection-to-audio-feature-dimensions-based-on-a-corpus-of-over-100-000-metal-songs-1911.04952</loc><lastmod>2019-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/warriors-of-the-word-deciphering-lyrical-topics-in-music-and-their-connection-to-audio-feature-dimensions-based-on-a-corpus-of-over-100-000-metal-songs-1911.04952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/warriors-of-the-word-deciphering-lyrical-topics-in-music-and-their-connection-to-audio-feature-dimensions-based-on-a-corpus-of-over-100-000-metal-songs-1911.04952"/></url>
<url><loc>https://scifaro.com/en/abs/3-d-feature-and-acoustic-modeling-for-far-field-speech-recognition-1911.05504</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3-d-feature-and-acoustic-modeling-for-far-field-speech-recognition-1911.05504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3-d-feature-and-acoustic-modeling-for-far-field-speech-recognition-1911.05504"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-voice-post-processing-using-voice-decoder-guidance-indicators-1911.05560</loc><lastmod>2019-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-voice-post-processing-using-voice-decoder-guidance-indicators-1911.05560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-voice-post-processing-using-voice-decoder-guidance-indicators-1911.05560"/></url>
<url><loc>https://scifaro.com/en/abs/the-phonetic-bases-of-vocal-expressed-emotion-natural-versus-acted-1911.05733</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-phonetic-bases-of-vocal-expressed-emotion-natural-versus-acted-1911.05733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-phonetic-bases-of-vocal-expressed-emotion-natural-versus-acted-1911.05733"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-voice-conversion-using-multitask-learning-with-text-to-speech-1911.06149</loc><lastmod>2019-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-voice-conversion-using-multitask-learning-with-text-to-speech-1911.06149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-voice-conversion-using-multitask-learning-with-text-to-speech-1911.06149"/></url>
<url><loc>https://scifaro.com/en/abs/independent-and-automatic-evaluation-of-acoustic-to-articulatory-inversion-models-1911.06573</loc><lastmod>2019-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-and-automatic-evaluation-of-acoustic-to-articulatory-inversion-models-1911.06573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-and-automatic-evaluation-of-acoustic-to-articulatory-inversion-models-1911.06573"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-methods-in-speaker-recognition-a-review-1911.06615</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-methods-in-speaker-recognition-a-review-1911.06615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-methods-in-speaker-recognition-a-review-1911.06615"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-multi-scale-detection-of-acoustic-events-1911.06878</loc><lastmod>2019-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-multi-scale-detection-of-acoustic-events-1911.06878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-multi-scale-detection-of-acoustic-events-1911.06878"/></url>
<url><loc>https://scifaro.com/en/abs/linguistically-aided-speaker-diarization-using-speaker-role-information-1911.07994</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/linguistically-aided-speaker-diarization-using-speaker-role-information-1911.07994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/linguistically-aided-speaker-diarization-using-speaker-role-information-1911.07994"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-microphone-speech-enhancement-based-on-deep-learning-1911.08153</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-microphone-speech-enhancement-based-on-deep-learning-1911.08153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-microphone-speech-enhancement-based-on-deep-learning-1911.08153"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-end-to-end-query-by-example-spoken-term-detection-1911.08332</loc><lastmod>2019-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-end-to-end-query-by-example-spoken-term-detection-1911.08332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-end-to-end-query-by-example-spoken-term-detection-1911.08332"/></url>
<url><loc>https://scifaro.com/en/abs/generative-audio-synthesis-with-a-parametric-model-1911.08335</loc><lastmod>2019-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-audio-synthesis-with-a-parametric-model-1911.08335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-audio-synthesis-with-a-parametric-model-1911.08335"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-loss-function-for-neural-modelling-of-audio-systems-1911.08922</loc><lastmod>2019-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-loss-function-for-neural-modelling-of-audio-systems-1911.08922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-loss-function-for-neural-modelling-of-audio-systems-1911.08922"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-audio-classification-system-based-on-raw-waveforms-and-mix-training-strategy-1911.09349</loc><lastmod>2019-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-audio-classification-system-based-on-raw-waveforms-and-mix-training-strategy-1911.09349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-audio-classification-system-based-on-raw-waveforms-and-mix-training-strategy-1911.09349"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-multi-modal-bone-air-conducted-speech-enhancement-1911.09847</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-multi-modal-bone-air-conducted-speech-enhancement-1911.09847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-multi-modal-bone-air-conducted-speech-enhancement-1911.09847"/></url>
<url><loc>https://scifaro.com/en/abs/signal-adaptive-and-perceptually-optimized-sound-zones-with-variable-span-trade-off-filters-1911.10016</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-adaptive-and-perceptually-optimized-sound-zones-with-variable-span-trade-off-filters-1911.10016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-adaptive-and-perceptually-optimized-sound-zones-with-variable-span-trade-off-filters-1911.10016"/></url>
<url><loc>https://scifaro.com/en/abs/invertible-dnn-based-nonlinear-time-frequency-transform-for-speech-enhancement-1911.10764</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/invertible-dnn-based-nonlinear-time-frequency-transform-for-speech-enhancement-1911.10764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/invertible-dnn-based-nonlinear-time-frequency-transform-for-speech-enhancement-1911.10764"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-via-dilated-convolutional-recurrent-neural-networks-1911.10888</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-via-dilated-convolutional-recurrent-neural-networks-1911.10888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-via-dilated-convolutional-recurrent-neural-networks-1911.10888"/></url>
<url><loc>https://scifaro.com/en/abs/robust-estimation-of-hypernasality-in-dysarthria-with-acoustic-model-likelihood-features-1911.11360</loc><lastmod>2020-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-estimation-of-hypernasality-in-dysarthria-with-acoustic-model-likelihood-features-1911.11360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-estimation-of-hypernasality-in-dysarthria-with-acoustic-model-likelihood-features-1911.11360"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-step-system-for-sound-event-localization-and-detection-1911.11373</loc><lastmod>2019-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-step-system-for-sound-event-localization-and-detection-1911.11373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-step-system-for-sound-event-localization-and-detection-1911.11373"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-multi-speaker-text-to-speech-synthesis-for-voice-cloning-without-using-parallel-corpus-for-unseen-speakers-1911.11601</loc><lastmod>2019-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-multi-speaker-text-to-speech-synthesis-for-voice-cloning-without-using-parallel-corpus-for-unseen-speakers-1911.11601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-multi-speaker-text-to-speech-synthesis-for-voice-cloning-without-using-parallel-corpus-for-unseen-speakers-1911.11601"/></url>
<url><loc>https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-1911.11610</loc><lastmod>2019-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-1911.11610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-1911.11610"/></url>
<url><loc>https://scifaro.com/en/abs/neural-percussive-synthesis-parameterised-by-high-level-timbral-features-1911.11853</loc><lastmod>2020-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-percussive-synthesis-parameterised-by-high-level-timbral-features-1911.11853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-percussive-synthesis-parameterised-by-high-level-timbral-features-1911.11853"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-prediction-of-suicidal-risk-in-military-couples-using-multimodal-interaction-cues-from-couples-conversations-1911.11927</loc><lastmod>2019-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-prediction-of-suicidal-risk-in-military-couples-using-multimodal-interaction-cues-from-couples-conversations-1911.11927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-prediction-of-suicidal-risk-in-military-couples-using-multimodal-interaction-cues-from-couples-conversations-1911.11927"/></url>
<url><loc>https://scifaro.com/en/abs/performance-comparison-of-uca-and-ucca-based-real-time-sound-source-localization-systems-using-circular-harmonics-srp-method-1911.12616</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-comparison-of-uca-and-ucca-based-real-time-sound-source-localization-systems-using-circular-harmonics-srp-method-1911.12616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-comparison-of-uca-and-ucca-based-real-time-sound-source-localization-systems-using-circular-harmonics-srp-method-1911.12616"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-neural-mask-estimator-for-generalized-eigen-value-beamforming-based-asr-1911.12617</loc><lastmod>2019-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-neural-mask-estimator-for-generalized-eigen-value-beamforming-based-asr-1911.12617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-neural-mask-estimator-for-generalized-eigen-value-beamforming-based-asr-1911.12617"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-detection-in-the-wild-lessons-learned-from-jsalt-2019-1912.00938</loc><lastmod>2019-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-detection-in-the-wild-lessons-learned-from-jsalt-2019-1912.00938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-detection-in-the-wild-lessons-learned-from-jsalt-2019-1912.00938"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-speech-synthesis-using-super-resolution-mel-spectrogram-1912.01167</loc><lastmod>2019-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-speech-synthesis-using-super-resolution-mel-spectrogram-1912.01167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-speech-synthesis-using-super-resolution-mel-spectrogram-1912.01167"/></url>
<url><loc>https://scifaro.com/en/abs/deep-contextualized-acoustic-representations-for-semi-supervised-speech-recognition-1912.01679</loc><lastmod>2020-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-contextualized-acoustic-representations-for-semi-supervised-speech-recognition-1912.01679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-contextualized-acoustic-representations-for-semi-supervised-speech-recognition-1912.01679"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-knowledge-into-end-to-end-speech-recognition-from-external-text-only-data-1912.01777</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-knowledge-into-end-to-end-speech-recognition-from-external-text-only-data-1912.01777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-knowledge-into-end-to-end-speech-recognition-from-external-text-only-data-1912.01777"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-u-nets-with-various-intermediate-blocks-for-spectrogram-based-singing-voice-separation-1912.02591</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-u-nets-with-various-intermediate-blocks-for-spectrogram-based-singing-voice-separation-1912.02591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-u-nets-with-various-intermediate-blocks-for-spectrogram-based-singing-voice-separation-1912.02591"/></url>
<url><loc>https://scifaro.com/en/abs/predominant-musical-instrument-classification-based-on-spectral-features-1912.02606</loc><lastmod>2020-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predominant-musical-instrument-classification-based-on-spectral-features-1912.02606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predominant-musical-instrument-classification-based-on-spectral-features-1912.02606"/></url>
<url><loc>https://scifaro.com/en/abs/seef-aldr-a-speaker-embedding-enhancement-framework-via-adversarial-learning-based-disentangled-representation-1912.02608</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seef-aldr-a-speaker-embedding-enhancement-framework-via-adversarial-learning-based-disentangled-representation-1912.02608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seef-aldr-a-speaker-embedding-enhancement-framework-via-adversarial-learning-based-disentangled-representation-1912.02608"/></url>
<url><loc>https://scifaro.com/en/abs/bimodal-speech-emotion-recognition-using-pre-trained-language-models-1912.02610</loc><lastmod>2019-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bimodal-speech-emotion-recognition-using-pre-trained-language-models-1912.02610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bimodal-speech-emotion-recognition-using-pre-trained-language-models-1912.02610"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-conversion-with-disentangled-representations-of-singer-and-vocal-technique-using-variational-autoencoders-1912.02613</loc><lastmod>2020-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-conversion-with-disentangled-representations-of-singer-and-vocal-technique-using-variational-autoencoders-1912.02613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-conversion-with-disentangled-representations-of-singer-and-vocal-technique-using-variational-autoencoders-1912.02613"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-transformer-architectures-for-large-scale-classification-and-synchronization-of-weakly-labeled-audio-events-1912.02615</loc><lastmod>2019-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-transformer-architectures-for-large-scale-classification-and-synchronization-of-weakly-labeled-audio-events-1912.02615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-transformer-architectures-for-large-scale-classification-and-synchronization-of-weakly-labeled-audio-events-1912.02615"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-target-speaker-enhancement-on-multi-talker-environment-using-event-driven-cameras-1912.02671</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-target-speaker-enhancement-on-multi-talker-environment-using-event-driven-cameras-1912.02671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-target-speaker-enhancement-on-multi-talker-environment-using-event-driven-cameras-1912.02671"/></url>
<url><loc>https://scifaro.com/en/abs/synchronous-transformers-for-end-to-end-speech-recognition-1912.02958</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synchronous-transformers-for-end-to-end-speech-recognition-1912.02958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synchronous-transformers-for-end-to-end-speech-recognition-1912.02958"/></url>
<url><loc>https://scifaro.com/en/abs/audio-attention-discriminative-language-model-for-asr-rescoring-1912.03363</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-attention-discriminative-language-model-for-asr-rescoring-1912.03363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-attention-discriminative-language-model-for-asr-rescoring-1912.03363"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-purpose-and-large-scale-speech-corpus-in-persian-and-english-for-speaker-and-speech-recognition-the-deepmine-database-1912.03627</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-purpose-and-large-scale-speech-corpus-in-persian-and-english-for-speaker-and-speech-recognition-the-deepmine-database-1912.03627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-purpose-and-large-scale-speech-corpus-in-persian-and-english-for-speaker-and-speech-recognition-the-deepmine-database-1912.03627"/></url>
<url><loc>https://scifaro.com/en/abs/visualizing-deep-neural-networks-for-speech-recognition-with-learned-topographic-filter-maps-1912.04067</loc><lastmod>2019-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizing-deep-neural-networks-for-speech-recognition-with-learned-topographic-filter-maps-1912.04067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizing-deep-neural-networks-for-speech-recognition-with-learned-topographic-filter-maps-1912.04067"/></url>
<url><loc>https://scifaro.com/en/abs/cross-language-aphasia-detection-using-optimal-transport-domain-adaptation-1912.04370</loc><lastmod>2019-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-language-aphasia-detection-using-optimal-transport-domain-adaptation-1912.04370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-language-aphasia-detection-using-optimal-transport-domain-adaptation-1912.04370"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-for-measuring-reading-levels-in-india-at-scale-1912.04381</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-for-measuring-reading-levels-in-india-at-scale-1912.04381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-for-measuring-reading-levels-in-india-at-scale-1912.04381"/></url>
<url><loc>https://scifaro.com/en/abs/development-and-evaluation-of-video-recordings-for-the-olsa-matrix-sentence-test-1912.04700</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-and-evaluation-of-video-recordings-for-the-olsa-matrix-sentence-test-1912.04700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-and-evaluation-of-video-recordings-for-the-olsa-matrix-sentence-test-1912.04700"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-the-chaos-level-of-infants-environment-via-unsupervised-learning-1912.04844</loc><lastmod>2019-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-the-chaos-level-of-infants-environment-via-unsupervised-learning-1912.04844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-the-chaos-level-of-infants-environment-via-unsupervised-learning-1912.04844"/></url>
<url><loc>https://scifaro.com/en/abs/advances-in-online-audio-visual-meeting-transcription-1912.04979</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advances-in-online-audio-visual-meeting-transcription-1912.04979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advances-in-online-audio-visual-meeting-transcription-1912.04979"/></url>
<url><loc>https://scifaro.com/en/abs/cooperative-audio-source-separation-and-enhancement-using-distributed-microphone-arrays-and-wearable-devices-1912.05038</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cooperative-audio-source-separation-and-enhancement-using-distributed-microphone-arrays-and-wearable-devices-1912.05038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cooperative-audio-source-separation-and-enhancement-using-distributed-microphone-arrays-and-wearable-devices-1912.05038"/></url>
<url><loc>https://scifaro.com/en/abs/motion-tolerant-beamforming-with-deformable-microphone-arrays-1912.05043</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/motion-tolerant-beamforming-with-deformable-microphone-arrays-1912.05043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/motion-tolerant-beamforming-with-deformable-microphone-arrays-1912.05043"/></url>
<url><loc>https://scifaro.com/en/abs/audiogmenter-a-matlab-toolbox-for-audio-data-augmentation-1912.05472</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiogmenter-a-matlab-toolbox-for-audio-data-augmentation-1912.05472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiogmenter-a-matlab-toolbox-for-audio-data-augmentation-1912.05472"/></url>
<url><loc>https://scifaro.com/en/abs/specaugment-on-large-scale-datasets-1912.05533</loc><lastmod>2019-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specaugment-on-large-scale-datasets-1912.05533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specaugment-on-large-scale-datasets-1912.05533"/></url>
<url><loc>https://scifaro.com/en/abs/on-neural-phone-recognition-of-mixed-source-ecog-signals-1912.05869</loc><lastmod>2019-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-neural-phone-recognition-of-mixed-source-ecog-signals-1912.05869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-neural-phone-recognition-of-mixed-source-ecog-signals-1912.05869"/></url>
<url><loc>https://scifaro.com/en/abs/singing-synthesis-with-a-little-help-from-my-attention-1912.05881</loc><lastmod>2020-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-synthesis-with-a-little-help-from-my-attention-1912.05881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-synthesis-with-a-little-help-from-my-attention-1912.05881"/></url>
<url><loc>https://scifaro.com/en/abs/measuring-mother-infant-emotions-by-audio-sensing-1912.05920</loc><lastmod>2019-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/measuring-mother-infant-emotions-by-audio-sensing-1912.05920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/measuring-mother-infant-emotions-by-audio-sensing-1912.05920"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-end-to-end-speech-recognition-with-neural-architecture-search-1912.05946</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-end-to-end-speech-recognition-with-neural-architecture-search-1912.05946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-end-to-end-speech-recognition-with-neural-architecture-search-1912.05946"/></url>
<url><loc>https://scifaro.com/en/abs/short-duration-speaker-verification-sdsv-challenge-2021-the-challenge-evaluation-plan-1912.06311</loc><lastmod>2021-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/short-duration-speaker-verification-sdsv-challenge-2021-the-challenge-evaluation-plan-1912.06311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/short-duration-speaker-verification-sdsv-challenge-2021-the-challenge-evaluation-plan-1912.06311"/></url>
<url><loc>https://scifaro.com/en/abs/voice-transformer-network-sequence-to-sequence-voice-conversion-using-transformer-with-text-to-speech-pretraining-1912.06813</loc><lastmod>2019-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-transformer-network-sequence-to-sequence-voice-conversion-using-transformer-with-text-to-speech-pretraining-1912.06813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-transformer-network-sequence-to-sequence-voice-conversion-using-transformer-with-text-to-speech-pretraining-1912.06813"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-training-of-time-domain-audio-separation-and-recognition-1912.08462</loc><lastmod>2020-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-training-of-time-domain-audio-separation-and-recognition-1912.08462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-training-of-time-domain-audio-separation-and-recognition-1912.08462"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-tdnn-with-convolutional-front-end-for-dialect-identification-in-the-2019-multi-genre-broadcast-challenge-1912.09003</loc><lastmod>2019-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-tdnn-with-convolutional-front-end-for-dialect-identification-in-the-2019-multi-genre-broadcast-challenge-1912.09003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-tdnn-with-convolutional-front-end-for-dialect-identification-in-the-2019-multi-genre-broadcast-challenge-1912.09003"/></url>
<url><loc>https://scifaro.com/en/abs/personalization-of-end-to-end-speech-recognition-on-mobile-devices-for-named-entities-1912.09251</loc><lastmod>2019-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalization-of-end-to-end-speech-recognition-on-mobile-devices-for-named-entities-1912.09251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalization-of-end-to-end-speech-recognition-on-mobile-devices-for-named-entities-1912.09251"/></url>
<url><loc>https://scifaro.com/en/abs/calibration-and-reference-simulations-for-the-auditory-periphery-model-of-verhulst-et-al-2018-version-1-2-1912.10026</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calibration-and-reference-simulations-for-the-auditory-periphery-model-of-verhulst-et-al-2018-version-1-2-1912.10026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calibration-and-reference-simulations-for-the-auditory-periphery-model-of-verhulst-et-al-2018-version-1-2-1912.10026"/></url>
<url><loc>https://scifaro.com/en/abs/end-point-detection-with-state-transition-model-based-on-chunk-wise-classification-1912.10442</loc><lastmod>2019-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-point-detection-with-state-transition-model-based-on-chunk-wise-classification-1912.10442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-point-detection-with-state-transition-model-based-on-chunk-wise-classification-1912.10442"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-of-inference-networks-for-vae-based-audio-visual-speech-enhancement-1912.10647</loc><lastmod>2021-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-of-inference-networks-for-vae-based-audio-visual-speech-enhancement-1912.10647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-of-inference-networks-for-vae-based-audio-visual-speech-enhancement-1912.10647"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-training-of-a-large-vocabulary-end-to-end-speech-recognition-system-1912.11040</loc><lastmod>2019-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-training-of-a-large-vocabulary-end-to-end-speech-recognition-system-1912.11040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-training-of-a-large-vocabulary-end-to-end-speech-recognition-system-1912.11040"/></url>
<url><loc>https://scifaro.com/en/abs/power-law-nonlinearity-with-maximally-uniform-distribution-criterion-for-improved-neural-network-training-in-automatic-speech-recognition-1912.11041</loc><lastmod>2019-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/power-law-nonlinearity-with-maximally-uniform-distribution-criterion-for-improved-neural-network-training-in-automatic-speech-recognition-1912.11041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/power-law-nonlinearity-with-maximally-uniform-distribution-criterion-for-improved-neural-network-training-in-automatic-speech-recognition-1912.11041"/></url>
<url><loc>https://scifaro.com/en/abs/a-cycle-gan-approach-to-model-natural-perturbations-in-speech-for-asr-applications-1912.11151</loc><lastmod>2019-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cycle-gan-approach-to-model-natural-perturbations-in-speech-for-asr-applications-1912.11151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cycle-gan-approach-to-model-natural-perturbations-in-speech-for-asr-applications-1912.11151"/></url>
<url><loc>https://scifaro.com/en/abs/learning-transferable-features-for-speech-emotion-recognition-1912.11547</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-transferable-features-for-speech-emotion-recognition-1912.11547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-transferable-features-for-speech-emotion-recognition-1912.11547"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-direction-of-arrival-estimation-using-improved-estimation-consistency-method-1912.11781</loc><lastmod>2019-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-direction-of-arrival-estimation-using-improved-estimation-consistency-method-1912.11781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-direction-of-arrival-estimation-using-improved-estimation-consistency-method-1912.11781"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-asr-with-lightweight-and-dynamic-convolutions-1912.11793</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-asr-with-lightweight-and-dynamic-convolutions-1912.11793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-asr-with-lightweight-and-dynamic-convolutions-1912.11793"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-speech-enhancement-using-a-multi-branch-temporal-convolutional-network-1912.12023</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-speech-enhancement-using-a-multi-branch-temporal-convolutional-network-1912.12023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-speech-enhancement-using-a-multi-branch-temporal-convolutional-network-1912.12023"/></url>
<url><loc>https://scifaro.com/en/abs/improved-multi-stage-training-of-online-attention-based-encoder-decoder-models-1912.12384</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-multi-stage-training-of-online-attention-based-encoder-decoder-models-1912.12384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-multi-stage-training-of-online-attention-based-encoder-decoder-models-1912.12384"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-gated-scaling-adaptative-acoustic-model-for-ctc-based-speech-recognition-1912.13307</loc><lastmod>2020-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-gated-scaling-adaptative-acoustic-model-for-ctc-based-speech-recognition-1912.13307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-gated-scaling-adaptative-acoustic-model-for-ctc-based-speech-recognition-1912.13307"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-batch-normalization-for-lstm-based-acoustic-modeling-of-speech-recognition-2001.00129</loc><lastmod>2020-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-batch-normalization-for-lstm-based-acoustic-modeling-of-speech-recognition-2001.00129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-batch-normalization-for-lstm-based-acoustic-modeling-of-speech-recognition-2001.00129"/></url>
<url><loc>https://scifaro.com/en/abs/eeg-based-continuous-speech-recognition-using-transformers-2001.00501</loc><lastmod>2020-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg-based-continuous-speech-recognition-using-transformers-2001.00501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg-based-continuous-speech-recognition-using-transformers-2001.00501"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-on-device-streaming-speech-recognition-with-large-speech-corpus-2001.00577</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-on-device-streaming-speech-recognition-with-large-speech-corpus-2001.00577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-on-device-streaming-speech-recognition-with-large-speech-corpus-2001.00577"/></url>
<url><loc>https://scifaro.com/en/abs/a-pilot-study-on-mandarin-chinese-cued-speech-2001.00731</loc><lastmod>2020-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pilot-study-on-mandarin-chinese-cued-speech-2001.00731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pilot-study-on-mandarin-chinese-cued-speech-2001.00731"/></url>
<url><loc>https://scifaro.com/en/abs/re-synchronization-using-the-hand-preceding-model-for-multi-modal-fusion-in-automatic-continuous-cued-speech-recognition-2001.00854</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/re-synchronization-using-the-hand-preceding-model-for-multi-modal-fusion-in-automatic-continuous-cued-speech-recognition-2001.00854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/re-synchronization-using-the-hand-preceding-model-for-multi-modal-fusion-in-automatic-continuous-cued-speech-recognition-2001.00854"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-based-on-denoising-autoencoder-with-multi-branched-encoders-2001.01538</loc><lastmod>2020-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-based-on-denoising-autoencoder-with-multi-branched-encoders-2001.01538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-based-on-denoising-autoencoder-with-multi-branched-encoders-2001.01538"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-recognition-of-overlapped-speech-for-the-lrs2-dataset-2001.01656</loc><lastmod>2020-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-recognition-of-overlapped-speech-for-the-lrs2-dataset-2001.01656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-recognition-of-overlapped-speech-for-the-lrs2-dataset-2001.01656"/></url>
<url><loc>https://scifaro.com/en/abs/character-aware-attention-based-end-to-end-speech-recognition-2001.01795</loc><lastmod>2020-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/character-aware-attention-based-end-to-end-speech-recognition-2001.01795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/character-aware-attention-based-end-to-end-speech-recognition-2001.01795"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-via-teacher-student-learning-for-end-to-end-speech-recognition-2001.01798</loc><lastmod>2020-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-via-teacher-student-learning-for-end-to-end-speech-recognition-2001.01798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-via-teacher-student-learning-for-end-to-end-speech-recognition-2001.01798"/></url>
<url><loc>https://scifaro.com/en/abs/audio-inpainting-revisited-and-reweighted-2001.02480</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-inpainting-revisited-and-reweighted-2001.02480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-inpainting-revisited-and-reweighted-2001.02480"/></url>
<url><loc>https://scifaro.com/en/abs/improving-dysarthric-speech-intelligibility-using-cycle-consistent-adversarial-training-2001.04260</loc><lastmod>2020-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-dysarthric-speech-intelligibility-using-cycle-consistent-adversarial-training-2001.04260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-dysarthric-speech-intelligibility-using-cycle-consistent-adversarial-training-2001.04260"/></url>
<url><loc>https://scifaro.com/en/abs/visually-guided-self-supervised-learning-of-speech-representations-2001.04316</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-guided-self-supervised-learning-of-speech-representations-2001.04316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-guided-self-supervised-learning-of-speech-representations-2001.04316"/></url>
<url><loc>https://scifaro.com/en/abs/a-differentiable-perceptual-audio-metric-learned-from-just-noticeable-differences-2001.04460</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-differentiable-perceptual-audio-metric-learned-from-just-noticeable-differences-2001.04460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-differentiable-perceptual-audio-metric-learned-from-just-noticeable-differences-2001.04460"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-deep-neural-network-for-modeling-speaker-characteristics-at-different-temporal-scales-2001.04584</loc><lastmod>2020-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-deep-neural-network-for-modeling-speaker-characteristics-at-different-temporal-scales-2001.04584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-deep-neural-network-for-modeling-speaker-characteristics-at-different-temporal-scales-2001.04584"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-speaker-embedding-learning-for-text-independent-speaker-verification-2001.04585</loc><lastmod>2020-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-speaker-embedding-learning-for-text-independent-speaker-verification-2001.04585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-speaker-embedding-learning-for-text-independent-speaker-verification-2001.04585"/></url>
<url><loc>https://scifaro.com/en/abs/improved-robust-asr-for-social-robots-in-public-spaces-2001.04619</loc><lastmod>2020-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-robust-asr-for-social-robots-in-public-spaces-2001.04619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-robust-asr-for-social-robots-in-public-spaces-2001.04619"/></url>
<url><loc>https://scifaro.com/en/abs/two-channel-audio-zooming-system-for-smartphone-2001.04940</loc><lastmod>2020-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-channel-audio-zooming-system-for-smartphone-2001.04940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-channel-audio-zooming-system-for-smartphone-2001.04940"/></url>
<url><loc>https://scifaro.com/en/abs/a-memory-augmented-architecture-for-continuous-speaker-identification-in-meetings-2001.05118</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-memory-augmented-architecture-for-continuous-speaker-identification-in-meetings-2001.05118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-memory-augmented-architecture-for-continuous-speaker-identification-in-meetings-2001.05118"/></url>
<url><loc>https://scifaro.com/en/abs/pairwise-discriminative-neural-plda-for-speaker-verification-2001.07034</loc><lastmod>2020-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pairwise-discriminative-neural-plda-for-speaker-verification-2001.07034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pairwise-discriminative-neural-plda-for-speaker-verification-2001.07034"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-filter-learning-using-soft-self-attention-for-raw-waveform-speech-recognition-2001.07067</loc><lastmod>2020-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-filter-learning-using-soft-self-attention-for-raw-waveform-speech-recognition-2001.07067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-filter-learning-using-soft-self-attention-for-raw-waveform-speech-recognition-2001.07067"/></url>
<url><loc>https://scifaro.com/en/abs/single-headed-attention-based-sequence-to-sequence-model-for-state-of-the-art-results-on-switchboard-2001.07263</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-headed-attention-based-sequence-to-sequence-model-for-state-of-the-art-results-on-switchboard-2001.07263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-headed-attention-based-sequence-to-sequence-model-for-state-of-the-art-results-on-switchboard-2001.07263"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-representation-disentanglement-using-cross-domain-features-and-adversarial-learning-in-variational-autoencoder-based-voice-conversion-2001.07849</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-representation-disentanglement-using-cross-domain-features-and-adversarial-learning-in-variational-autoencoder-based-voice-conversion-2001.07849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-representation-disentanglement-using-cross-domain-features-and-adversarial-learning-in-variational-autoencoder-based-voice-conversion-2001.07849"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-online-ctc-attention-end-to-end-speech-recognition-architecture-2001.08290</loc><lastmod>2020-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-online-ctc-attention-end-to-end-speech-recognition-architecture-2001.08290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-online-ctc-attention-end-to-end-speech-recognition-architecture-2001.08290"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-discrimination-of-target-speech-extraction-with-time-domain-speakerbeam-2001.08378</loc><lastmod>2020-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-discrimination-of-target-speech-extraction-with-time-domain-speakerbeam-2001.08378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-discrimination-of-target-speech-extraction-with-time-domain-speakerbeam-2001.08378"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-human-evaluation-of-audio-adversarial-examples-2001.08444</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-human-evaluation-of-audio-adversarial-examples-2001.08444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-human-evaluation-of-audio-adversarial-examples-2001.08444"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-gradient-approximation-for-memory-efficient-on-device-training-of-deep-neural-network-2001.08885</loc><lastmod>2020-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-gradient-approximation-for-memory-efficient-on-device-training-of-deep-neural-network-2001.08885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-gradient-approximation-for-memory-efficient-on-device-training-of-deep-neural-network-2001.08885"/></url>
<url><loc>https://scifaro.com/en/abs/performance-of-a-deep-neural-network-at-detecting-north-atlantic-right-whale-upcalls-2001.09127</loc><lastmod>2020-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-of-a-deep-neural-network-at-detecting-north-atlantic-right-whale-upcalls-2001.09127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-of-a-deep-neural-network-at-detecting-north-atlantic-right-whale-upcalls-2001.09127"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-asr-by-end-to-end-self-training-2001.09128</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-asr-by-end-to-end-self-training-2001.09128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-asr-by-end-to-end-self-training-2001.09128"/></url>
<url><loc>https://scifaro.com/en/abs/data-techniques-for-online-end-to-end-speech-recognition-2001.09221</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-techniques-for-online-end-to-end-speech-recognition-2001.09221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-techniques-for-online-end-to-end-speech-recognition-2001.09221"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-self-supervised-learning-for-robust-speech-recognition-2001.09239</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-self-supervised-learning-for-robust-speech-recognition-2001.09239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-self-supervised-learning-for-robust-speech-recognition-2001.09239"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-for-voice-trigger-detection-2001.09519</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-for-voice-trigger-detection-2001.09519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-for-voice-trigger-detection-2001.09519"/></url>
<url><loc>https://scifaro.com/en/abs/noise-dependent-super-gaussian-coherence-based-dual-microphone-speech-enhancement-for-hearing-aid-application-using-smartphone-2001.09571</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-dependent-super-gaussian-coherence-based-dual-microphone-speech-enhancement-for-hearing-aid-application-using-smartphone-2001.09571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-dependent-super-gaussian-coherence-based-dual-microphone-speech-enhancement-for-hearing-aid-application-using-smartphone-2001.09571"/></url>
<url><loc>https://scifaro.com/en/abs/audio-codec-enhancement-with-generative-adversarial-networks-2001.09653</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-codec-enhancement-with-generative-adversarial-networks-2001.09653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-codec-enhancement-with-generative-adversarial-networks-2001.09653"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-a-recurrent-two-stage-net-work-2001.09772</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-a-recurrent-two-stage-net-work-2001.09772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-with-a-recurrent-two-stage-net-work-2001.09772"/></url>
<url><loc>https://scifaro.com/en/abs/source-coding-of-audio-signals-with-a-generative-model-2001.09847</loc><lastmod>2020-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-coding-of-audio-signals-with-a-generative-model-2001.09847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-coding-of-audio-signals-with-a-generative-model-2001.09847"/></url>
<url><loc>https://scifaro.com/en/abs/frame-based-overlapping-speech-detection-using-convolutional-neural-networks-2001.09937</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-based-overlapping-speech-detection-using-convolutional-neural-networks-2001.09937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-based-overlapping-speech-detection-using-convolutional-neural-networks-2001.09937"/></url>
<url><loc>https://scifaro.com/en/abs/omap-l138-lcdk-development-kit-2001.10094</loc><lastmod>2020-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omap-l138-lcdk-development-kit-2001.10094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omap-l138-lcdk-development-kit-2001.10094"/></url>
<url><loc>https://scifaro.com/en/abs/clcnet-deep-learning-based-noise-reduction-for-hearing-aids-using-complex-linear-coding-2001.10218</loc><lastmod>2020-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clcnet-deep-learning-based-noise-reduction-for-hearing-aids-using-complex-linear-coding-2001.10218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clcnet-deep-learning-based-noise-reduction-for-hearing-aids-using-complex-linear-coding-2001.10218"/></url>
<url><loc>https://scifaro.com/en/abs/submodular-rank-aggregation-on-score-based-permutations-for-distributed-automatic-speech-recognition-2001.10529</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/submodular-rank-aggregation-on-score-based-permutations-for-distributed-automatic-speech-recognition-2001.10529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/submodular-rank-aggregation-on-score-based-permutations-for-distributed-automatic-speech-recognition-2001.10529"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-speech-distortion-losses-for-neural-network-based-real-time-speech-enhancement-2001.10601</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-speech-distortion-losses-for-neural-network-based-real-time-speech-enhancement-2001.10601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-speech-distortion-losses-for-neural-network-based-real-time-speech-enhancement-2001.10601"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-pre-training-of-bidirectional-speech-encoders-via-masked-reconstruction-2001.10603</loc><lastmod>2020-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-pre-training-of-bidirectional-speech-encoders-via-masked-reconstruction-2001.10603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-pre-training-of-bidirectional-speech-encoders-via-masked-reconstruction-2001.10603"/></url>
<url><loc>https://scifaro.com/en/abs/environment-aware-reconfigurable-noise-suppression-2001.10718</loc><lastmod>2020-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environment-aware-reconfigurable-noise-suppression-2001.10718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environment-aware-reconfigurable-noise-suppression-2001.10718"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-for-speaker-verification-and-voice-trigger-detection-2001.10816</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-for-speaker-verification-and-voice-trigger-detection-2001.10816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-for-speaker-verification-and-voice-trigger-detection-2001.10816"/></url>
<url><loc>https://scifaro.com/en/abs/mcsae-masked-cross-self-attentive-encoding-for-speaker-embedding-2001.10817</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mcsae-masked-cross-self-attentive-encoding-for-speaker-embedding-2001.10817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mcsae-masked-cross-self-attentive-encoding-for-speaker-embedding-2001.10817"/></url>
<url><loc>https://scifaro.com/en/abs/lattice-based-improvements-for-voice-triggering-using-graph-neural-networks-2001.10822</loc><lastmod>2020-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lattice-based-improvements-for-voice-triggering-using-graph-neural-networks-2001.10822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lattice-based-improvements-for-voice-triggering-using-graph-neural-networks-2001.10822"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-decision-fusion-for-wfst-based-and-seq2seq-models-2001.10832</loc><lastmod>2020-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-decision-fusion-for-wfst-based-and-seq2seq-models-2001.10832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-decision-fusion-for-wfst-based-and-seq2seq-models-2001.10832"/></url>
<url><loc>https://scifaro.com/en/abs/compact-recurrent-neural-networks-for-acoustic-event-detection-on-low-energy-low-complexity-platforms-2001.10876</loc><lastmod>2020-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compact-recurrent-neural-networks-for-acoustic-event-detection-on-low-energy-low-complexity-platforms-2001.10876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compact-recurrent-neural-networks-for-acoustic-event-detection-on-low-energy-low-complexity-platforms-2001.10876"/></url>
<url><loc>https://scifaro.com/en/abs/improving-language-identification-for-multilingual-speakers-2001.11019</loc><lastmod>2020-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-language-identification-for-multilingual-speakers-2001.11019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-language-identification-for-multilingual-speakers-2001.11019"/></url>
<url><loc>https://scifaro.com/en/abs/conditioning-autoencoder-latent-spaces-for-real-time-timbre-interpolation-and-synthesis-2001.11296</loc><lastmod>2020-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioning-autoencoder-latent-spaces-for-real-time-timbre-interpolation-and-synthesis-2001.11296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioning-autoencoder-latent-spaces-for-real-time-timbre-interpolation-and-synthesis-2001.11296"/></url>
<url><loc>https://scifaro.com/en/abs/but-opensat-2019-speech-recognition-system-2001.11360</loc><lastmod>2020-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-opensat-2019-speech-recognition-system-2001.11360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-opensat-2019-speech-recognition-system-2001.11360"/></url>
<url><loc>https://scifaro.com/en/abs/improving-lpcnet-based-text-to-speech-with-linear-prediction-structured-mixture-density-network-2001.11686</loc><lastmod>2020-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-lpcnet-based-text-to-speech-with-linear-prediction-structured-mixture-density-network-2001.11686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-lpcnet-based-text-to-speech-with-linear-prediction-structured-mixture-density-network-2001.11686"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-the-role-of-subsidiary-information-in-replay-attack-spoofing-detection-2001.11688</loc><lastmod>2020-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-the-role-of-subsidiary-information-in-replay-attack-spoofing-detection-2001.11688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-the-role-of-subsidiary-information-in-replay-attack-spoofing-detection-2001.11688"/></url>
<url><loc>https://scifaro.com/en/abs/graph-cepstrum-spatial-feature-extracted-from-partially-connected-microphones-2001.11894</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-cepstrum-spatial-feature-extracted-from-partially-connected-microphones-2001.11894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-cepstrum-spatial-feature-extracted-from-partially-connected-microphones-2001.11894"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-deep-feature-loss-based-enhancement-for-speaker-verification-2002.00139</loc><lastmod>2020-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-deep-feature-loss-based-enhancement-for-speaker-verification-2002.00139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-deep-feature-loss-based-enhancement-for-speaker-verification-2002.00139"/></url>
<url><loc>https://scifaro.com/en/abs/transforming-spectrum-and-prosody-for-emotional-voice-conversion-with-non-parallel-training-data-2002.00198</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transforming-spectrum-and-prosody-for-emotional-voice-conversion-with-non-parallel-training-data-2002.00198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transforming-spectrum-and-prosody-for-emotional-voice-conversion-with-non-parallel-training-data-2002.00198"/></url>
<url><loc>https://scifaro.com/en/abs/wavetts-tacotron-based-tts-with-joint-time-frequency-domain-loss-2002.00417</loc><lastmod>2020-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavetts-tacotron-based-tts-with-joint-time-frequency-domain-loss-2002.00417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavetts-tacotron-based-tts-with-joint-time-frequency-domain-loss-2002.00417"/></url>
<url><loc>https://scifaro.com/en/abs/tensor-to-vector-regression-for-multi-channel-speech-enhancement-based-on-tensor-train-network-2002.00544</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tensor-to-vector-regression-for-multi-channel-speech-enhancement-based-on-tensor-train-network-2002.00544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tensor-to-vector-regression-for-multi-channel-speech-enhancement-based-on-tensor-train-network-2002.00544"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-automatic-speech-recognition-integrated-with-ctc-based-voice-activity-detection-2002.00551</loc><lastmod>2020-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-automatic-speech-recognition-integrated-with-ctc-based-voice-activity-detection-2002.00551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-automatic-speech-recognition-integrated-with-ctc-based-voice-activity-detection-2002.00551"/></url>
<url><loc>https://scifaro.com/en/abs/time-difference-of-arrival-estimation-from-frequency-sliding-generalized-cross-correlations-using-convolutional-neural-networks-2002.00641</loc><lastmod>2020-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-difference-of-arrival-estimation-from-frequency-sliding-generalized-cross-correlations-using-convolutional-neural-networks-2002.00641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-difference-of-arrival-estimation-from-frequency-sliding-generalized-cross-correlations-using-convolutional-neural-networks-2002.00641"/></url>
<url><loc>https://scifaro.com/en/abs/within-sample-variability-invariant-loss-for-robust-speaker-recognition-under-noisy-environments-2002.00924</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/within-sample-variability-invariant-loss-for-robust-speaker-recognition-under-noisy-environments-2002.00924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/within-sample-variability-invariant-loss-for-robust-speaker-recognition-under-noisy-environments-2002.00924"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-anomaly-detection-via-latent-regularized-gaussian-mixture-generative-adversarial-networks-2002.01107</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-via-latent-regularized-gaussian-mixture-generative-adversarial-networks-2002.01107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-via-latent-regularized-gaussian-mixture-generative-adversarial-networks-2002.01107"/></url>
<url><loc>https://scifaro.com/en/abs/training-keyword-spotters-with-limited-and-synthesized-speech-data-2002.01322</loc><lastmod>2020-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-keyword-spotters-with-limited-and-synthesized-speech-data-2002.01322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-keyword-spotters-with-limited-and-synthesized-speech-data-2002.01322"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-emotion-primitives-from-speech-and-their-use-in-discerning-categorical-emotions-2002.01323</loc><lastmod>2020-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-emotion-primitives-from-speech-and-their-use-in-discerning-categorical-emotions-2002.01323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-emotion-primitives-from-speech-and-their-use-in-discerning-categorical-emotions-2002.01323"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-calibration-with-polynomial-regression-for-2-d-projection-using-svd-phat-2002.01440</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-calibration-with-polynomial-regression-for-2-d-projection-using-svd-phat-2002.01440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-calibration-with-polynomial-regression-for-2-d-projection-using-svd-phat-2002.01440"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-and-spectral-deep-attention-fusion-for-multi-channel-speech-separation-using-deep-embedding-features-2002.01626</loc><lastmod>2020-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-and-spectral-deep-attention-fusion-for-multi-channel-speech-separation-using-deep-embedding-features-2002.01626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-and-spectral-deep-attention-fusion-for-multi-channel-speech-separation-using-deep-embedding-features-2002.01626"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-of-head-motion-from-speech-waveforms-with-a-canonical-correlation-constrained-autoencoder-2002.01869</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-of-head-motion-from-speech-waveforms-with-a-canonical-correlation-constrained-autoencoder-2002.01869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-of-head-motion-from-speech-waveforms-with-a-canonical-correlation-constrained-autoencoder-2002.01869"/></url>
<url><loc>https://scifaro.com/en/abs/boffin-tts-few-shot-speaker-adaptation-by-bayesian-optimization-2002.01953</loc><lastmod>2020-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boffin-tts-few-shot-speaker-adaptation-by-bayesian-optimization-2002.01953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boffin-tts-few-shot-speaker-adaptation-by-bayesian-optimization-2002.01953"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-transducer-a-streamable-speech-recognition-model-with-transformer-encoders-and-rnn-t-loss-2002.02562</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-transducer-a-streamable-speech-recognition-model-with-transformer-encoders-and-rnn-t-loss-2002.02562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-transducer-a-streamable-speech-recognition-model-with-transformer-encoders-and-rnn-t-loss-2002.02562"/></url>
<url><loc>https://scifaro.com/en/abs/leap-system-for-sre19-cts-challenge-improvements-and-error-analysis-2002.02735</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leap-system-for-sre19-cts-challenge-improvements-and-error-analysis-2002.02735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leap-system-for-sre19-cts-challenge-improvements-and-error-analysis-2002.02735"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-pretraining-transfers-well-across-languages-2002.02848</loc><lastmod>2020-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-pretraining-transfers-well-across-languages-2002.02848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-pretraining-transfers-well-across-languages-2002.02848"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-analysis-of-information-encoded-in-disentangled-neural-speaker-representations-2002.03520</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-analysis-of-information-encoded-in-disentangled-neural-speaker-representations-2002.03520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-analysis-of-information-encoded-in-disentangled-neural-speaker-representations-2002.03520"/></url>
<url><loc>https://scifaro.com/en/abs/nplda-a-deep-neural-plda-model-for-speaker-verification-2002.03562</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nplda-a-deep-neural-plda-model-for-speaker-verification-2002.03562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nplda-a-deep-neural-plda-model-for-speaker-verification-2002.03562"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-using-speaker-cues-2002.03566</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-using-speaker-cues-2002.03566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-using-speaker-cues-2002.03566"/></url>
<url><loc>https://scifaro.com/en/abs/fully-hierarchical-fine-grained-prosody-modeling-for-interpretable-speech-synthesis-2002.03785</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-hierarchical-fine-grained-prosody-modeling-for-interpretable-speech-synthesis-2002.03785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-hierarchical-fine-grained-prosody-modeling-for-interpretable-speech-synthesis-2002.03785"/></url>
<url><loc>https://scifaro.com/en/abs/generating-diverse-and-natural-text-to-speech-samples-using-a-quantized-fine-grained-vae-and-auto-regressive-prosody-prior-2002.03788</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-diverse-and-natural-text-to-speech-samples-using-a-quantized-fine-grained-vae-and-auto-regressive-prosody-prior-2002.03788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-diverse-and-natural-text-to-speech-samples-using-a-quantized-fine-grained-vae-and-auto-regressive-prosody-prior-2002.03788"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-investigation-on-optimizing-tandem-speaker-verification-and-countermeasure-systems-using-reinforcement-learning-2002.03801</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-investigation-on-optimizing-tandem-speaker-verification-and-countermeasure-systems-using-reinforcement-learning-2002.03801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-investigation-on-optimizing-tandem-speaker-verification-and-countermeasure-systems-using-reinforcement-learning-2002.03801"/></url>
<url><loc>https://scifaro.com/en/abs/a-speaker-verification-backend-for-improved-calibration-performance-across-varying-conditions-2002.03802</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speaker-verification-backend-for-improved-calibration-performance-across-varying-conditions-2002.03802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speaker-verification-backend-for-improved-calibration-performance-across-varying-conditions-2002.03802"/></url>
<url><loc>https://scifaro.com/en/abs/vocoder-free-end-to-end-voice-conversion-with-transformer-network-2002.03808</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocoder-free-end-to-end-voice-conversion-with-transformer-network-2002.03808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocoder-free-end-to-end-voice-conversion-with-transformer-network-2002.03808"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-silent-speech-recognition-using-eeg-2002.03851</loc><lastmod>2020-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-silent-speech-recognition-using-eeg-2002.03851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-silent-speech-recognition-using-eeg-2002.03851"/></url>
<url><loc>https://scifaro.com/en/abs/attentional-networks-for-music-generation-2002.03854</loc><lastmod>2020-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentional-networks-for-music-generation-2002.03854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentional-networks-for-music-generation-2002.03854"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-with-transformer-2002.03921</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-with-transformer-2002.03921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-speech-recognition-with-transformer-2002.03921"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-active-speaker-detection-and-virtual-cinematography-for-video-conferencing-2002.03977</loc><lastmod>2022-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-active-speaker-detection-and-virtual-cinematography-for-video-conferencing-2002.03977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-active-speaker-detection-and-virtual-cinematography-for-video-conferencing-2002.03977"/></url>
<url><loc>https://scifaro.com/en/abs/deep-feature-embedding-and-hierarchical-classification-for-audio-scene-classification-2002.04857</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-feature-embedding-and-hierarchical-classification-for-audio-scene-classification-2002.04857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-feature-embedding-and-hierarchical-classification-for-audio-scene-classification-2002.04857"/></url>
<url><loc>https://scifaro.com/en/abs/content-based-singing-voice-extraction-from-a-musical-mixture-2002.04933</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-based-singing-voice-extraction-from-a-musical-mixture-2002.04933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-based-singing-voice-extraction-from-a-musical-mixture-2002.04933"/></url>
<url><loc>https://scifaro.com/en/abs/fastwave-accelerating-autoregressive-convolutional-neural-networks-on-fpga-2002.04971</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastwave-accelerating-autoregressive-convolutional-neural-networks-on-fpga-2002.04971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastwave-accelerating-autoregressive-convolutional-neural-networks-on-fpga-2002.04971"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-boundary-detection-using-learnable-segmental-features-2002.04992</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-boundary-detection-using-learnable-segmental-features-2002.04992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-boundary-detection-using-learnable-segmental-features-2002.04992"/></url>
<url><loc>https://scifaro.com/en/abs/active-learning-for-sound-event-detection-2002.05033</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-learning-for-sound-event-detection-2002.05033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-learning-for-sound-event-detection-2002.05033"/></url>
<url><loc>https://scifaro.com/en/abs/x-vectors-meet-emotions-a-study-on-dependencies-between-emotion-and-speaker-recognition-2002.05039</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-vectors-meet-emotions-a-study-on-dependencies-between-emotion-and-speaker-recognition-2002.05039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-vectors-meet-emotions-a-study-on-dependencies-between-emotion-and-speaker-recognition-2002.05039"/></url>
<url><loc>https://scifaro.com/en/abs/attentional-speech-recognition-models-misbehave-on-out-of-domain-utterances-2002.05150</loc><lastmod>2020-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentional-speech-recognition-models-misbehave-on-out-of-domain-utterances-2002.05150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentional-speech-recognition-models-misbehave-on-out-of-domain-utterances-2002.05150"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-audio-visual-speaker-diarization-2002.05314</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-audio-visual-speaker-diarization-2002.05314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-audio-visual-speaker-diarization-2002.05314"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-user-models-based-on-gmm-ubm-and-i-vectors-for-speech-handwriting-and-gait-assessment-of-parkinson-s-disease-patients-2002.05412</loc><lastmod>2020-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-user-models-based-on-gmm-ubm-and-i-vectors-for-speech-handwriting-and-gait-assessment-of-parkinson-s-disease-patients-2002.05412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-user-models-based-on-gmm-ubm-and-i-vectors-for-speech-handwriting-and-gait-assessment-of-parkinson-s-disease-patients-2002.05412"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-and-scalable-neural-residual-waveform-coding-with-collaborative-quantization-2002.05604</loc><lastmod>2020-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-and-scalable-neural-residual-waveform-coding-with-collaborative-quantization-2002.05604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-and-scalable-neural-residual-waveform-coding-with-collaborative-quantization-2002.05604"/></url>
<url><loc>https://scifaro.com/en/abs/consistency-aware-multi-channel-speech-enhancement-using-deep-neural-networks-2002.05831</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistency-aware-multi-channel-speech-enhancement-using-deep-neural-networks-2002.05831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistency-aware-multi-channel-speech-enhancement-using-deep-neural-networks-2002.05831"/></url>
<url><loc>https://scifaro.com/en/abs/phase-reconstruction-based-on-recurrent-phase-unwrapping-with-deep-neural-networks-2002.05832</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-reconstruction-based-on-recurrent-phase-unwrapping-with-deep-neural-networks-2002.05832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-reconstruction-based-on-recurrent-phase-unwrapping-with-deep-neural-networks-2002.05832"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-enhancement-using-equilibriated-rnn-2002.05843</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-enhancement-using-equilibriated-rnn-2002.05843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-enhancement-using-equilibriated-rnn-2002.05843"/></url>
<url><loc>https://scifaro.com/en/abs/a-sequence-matching-network-for-polyphonic-sound-event-localization-and-detection-2002.05865</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sequence-matching-network-for-polyphonic-sound-event-localization-and-detection-2002.05865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sequence-matching-network-for-polyphonic-sound-event-localization-and-detection-2002.05865"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-self-adaptation-and-multi-head-self-attention-2002.05873</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-self-adaptation-and-multi-head-self-attention-2002.05873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-self-adaptation-and-multi-head-self-attention-2002.05873"/></url>
<url><loc>https://scifaro.com/en/abs/stable-training-of-dnn-for-speech-enhancement-based-on-perceptually-motivated-black-box-cost-function-2002.05879</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stable-training-of-dnn-for-speech-enhancement-based-on-perceptually-motivated-black-box-cost-function-2002.05879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stable-training-of-dnn-for-speech-enhancement-based-on-perceptually-motivated-black-box-cost-function-2002.05879"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-based-on-sound-intensity-vector-refined-by-dnn-based-denoising-and-source-separation-2002.05994</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-based-on-sound-intensity-vector-refined-by-dnn-based-denoising-and-source-separation-2002.05994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-based-on-sound-intensity-vector-refined-by-dnn-based-denoising-and-source-separation-2002.05994"/></url>
<url><loc>https://scifaro.com/en/abs/an-adaptive-x-vector-model-for-text-independent-speaker-verification-2002.06049</loc><lastmod>2025-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adaptive-x-vector-model-for-text-independent-speaker-verification-2002.06049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adaptive-x-vector-model-for-text-independent-speaker-verification-2002.06049"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speaker-adaptation-using-attention-based-speaker-memory-for-end-to-end-asr-2002.06165</loc><lastmod>2020-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speaker-adaptation-using-attention-based-speaker-memory-for-end-to-end-asr-2002.06165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speaker-adaptation-using-attention-based-speaker-memory-for-end-to-end-asr-2002.06165"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-with-region-proposal-network-2002.06220</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-with-region-proposal-network-2002.06220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-with-region-proposal-network-2002.06220"/></url>
<url><loc>https://scifaro.com/en/abs/boosted-locality-sensitive-hashing-discriminative-binary-codes-for-source-separation-2002.06239</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosted-locality-sensitive-hashing-discriminative-binary-codes-for-source-separation-2002.06239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosted-locality-sensitive-hashing-discriminative-binary-codes-for-source-separation-2002.06239"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-pooling-methods-on-lstm-models-for-rare-acoustic-event-classification-2002.06279</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-pooling-methods-on-lstm-models-for-rare-acoustic-event-classification-2002.06279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-pooling-methods-on-lstm-models-for-rare-acoustic-event-classification-2002.06279"/></url>
<url><loc>https://scifaro.com/en/abs/small-energy-masking-for-improved-neural-network-training-for-end-to-end-speech-recognition-2002.06312</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-energy-masking-for-improved-neural-network-training-for-end-to-end-speech-recognition-2002.06312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-energy-masking-for-improved-neural-network-training-for-end-to-end-speech-recognition-2002.06312"/></url>
<url><loc>https://scifaro.com/en/abs/speech-to-singing-conversion-in-an-encoder-decoder-framework-2002.06595</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-to-singing-conversion-in-an-encoder-decoder-framework-2002.06595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-to-singing-conversion-in-an-encoder-decoder-framework-2002.06595"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-binaural-speech-separation-with-preserved-spatial-cues-2002.06637</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-binaural-speech-separation-with-preserved-spatial-cues-2002.06637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-binaural-speech-separation-with-preserved-spatial-cues-2002.06637"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-bilinear-pooling-on-time-liked-and-frequency-liked-convolution-neural-network-2002.07065</loc><lastmod>2020-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-bilinear-pooling-on-time-liked-and-frequency-liked-convolution-neural-network-2002.07065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-bilinear-pooling-on-time-liked-and-frequency-liked-convolution-neural-network-2002.07065"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-with-capsule-networks-for-speech-to-intent-applications-2002.07450</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-with-capsule-networks-for-speech-to-intent-applications-2002.07450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-with-capsule-networks-for-speech-to-intent-applications-2002.07450"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-support-vector-machine-2002.07590</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-support-vector-machine-2002.07590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-support-vector-machine-2002.07590"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-siamese-neural-network-for-improving-replay-attack-detection-2002.07629</loc><lastmod>2020-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-siamese-neural-network-for-improving-replay-attack-detection-2002.07629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-siamese-neural-network-for-improving-replay-attack-detection-2002.07629"/></url>
<url><loc>https://scifaro.com/en/abs/workshop-report-detection-and-classification-in-marine-bioacoustics-with-deep-learning-2002.08249</loc><lastmod>2020-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/workshop-report-detection-and-classification-in-marine-bioacoustics-with-deep-learning-2002.08249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/workshop-report-detection-and-classification-in-marine-bioacoustics-with-deep-learning-2002.08249"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-of-conv-tasnet-2002.08688</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-of-conv-tasnet-2002.08688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-of-conv-tasnet-2002.08688"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-speech-embeddings-using-cross-modal-self-supervision-2002.08742</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-speech-embeddings-using-cross-modal-self-supervision-2002.08742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-speech-embeddings-using-cross-modal-self-supervision-2002.08742"/></url>
<url><loc>https://scifaro.com/en/abs/isegan-improved-speech-enhancement-generative-adversarial-networks-2002.08796</loc><lastmod>2020-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/isegan-improved-speech-enhancement-generative-adversarial-networks-2002.08796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/isegan-improved-speech-enhancement-generative-adversarial-networks-2002.08796"/></url>
<url><loc>https://scifaro.com/en/abs/imputer-sequence-modelling-via-imputation-and-dynamic-programming-2002.08926</loc><lastmod>2020-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imputer-sequence-modelling-via-imputation-and-dynamic-programming-2002.08926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imputer-sequence-modelling-via-imputation-and-dynamic-programming-2002.08926"/></url>
<url><loc>https://scifaro.com/en/abs/wavesplit-end-to-end-speech-separation-by-speaker-clustering-2002.08933</loc><lastmod>2020-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavesplit-end-to-end-speech-separation-by-speaker-clustering-2002.08933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavesplit-end-to-end-speech-separation-by-speaker-clustering-2002.08933"/></url>
<url><loc>https://scifaro.com/en/abs/multi-label-sound-event-retrieval-using-a-deep-learning-based-siamese-structure-with-a-pairwise-presence-matrix-2002.09026</loc><lastmod>2020-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-label-sound-event-retrieval-using-a-deep-learning-based-siamese-structure-with-a-pairwise-presence-matrix-2002.09026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-label-sound-event-retrieval-using-a-deep-learning-based-siamese-structure-with-a-pairwise-presence-matrix-2002.09026"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-trainable-front-ends-for-neural-speech-enhancement-2002.09286</loc><lastmod>2020-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-trainable-front-ends-for-neural-speech-enhancement-2002.09286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-trainable-front-ends-for-neural-speech-enhancement-2002.09286"/></url>
<url><loc>https://scifaro.com/en/abs/multi-branch-learning-for-weakly-labeled-sound-event-detection-2002.09661</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-branch-learning-for-weakly-labeled-sound-event-detection-2002.09661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-branch-learning-for-weakly-labeled-sound-event-detection-2002.09661"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-view-cnn-based-acoustic-classification-system-for-automatic-animal-species-identification-2002.09821</loc><lastmod>2020-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-view-cnn-based-acoustic-classification-system-for-automatic-animal-species-identification-2002.09821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-view-cnn-based-acoustic-classification-system-for-automatic-animal-species-identification-2002.09821"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-sequence-to-sequence-neural-tts-with-lpcnet-backend-for-real-time-speech-synthesis-on-cpu-2002.10708</loc><lastmod>2020-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-sequence-to-sequence-neural-tts-with-lpcnet-backend-for-real-time-speech-synthesis-on-cpu-2002.10708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-sequence-to-sequence-neural-tts-with-lpcnet-backend-for-real-time-speech-synthesis-on-cpu-2002.10708"/></url>
<url><loc>https://scifaro.com/en/abs/an-lstm-based-architecture-to-relate-speech-stimulus-to-eeg-2002.10988</loc><lastmod>2020-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-lstm-based-architecture-to-relate-speech-stimulus-to-eeg-2002.10988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-lstm-based-architecture-to-relate-speech-stimulus-to-eeg-2002.10988"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-online-separation-of-the-sound-source-of-interest-through-blstm-based-binary-masking-2002.11241</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-online-separation-of-the-sound-source-of-interest-through-blstm-based-binary-masking-2002.11241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-online-separation-of-the-sound-source-of-interest-through-blstm-based-binary-masking-2002.11241"/></url>
<url><loc>https://scifaro.com/en/abs/dataset-of-raw-and-pre-processed-speech-signals-mel-frequency-cepstral-coefficients-of-speech-and-heart-rate-measurements-2002.11250</loc><lastmod>2020-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dataset-of-raw-and-pre-processed-speech-signals-mel-frequency-cepstral-coefficients-of-speech-and-heart-rate-measurements-2002.11250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dataset-of-raw-and-pre-processed-speech-signals-mel-frequency-cepstral-coefficients-of-speech-and-heart-rate-measurements-2002.11250"/></url>
<url><loc>https://scifaro.com/en/abs/a-density-ratio-approach-to-language-model-fusion-in-end-to-end-automatic-speech-recognition-2002.11268</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-density-ratio-approach-to-language-model-fusion-in-end-to-end-automatic-speech-recognition-2002.11268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-density-ratio-approach-to-language-model-fusion-in-end-to-end-automatic-speech-recognition-2002.11268"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-and-multistage-fusion-for-dimensional-audiovisual-emotion-recognition-2002.11312</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-and-multistage-fusion-for-dimensional-audiovisual-emotion-recognition-2002.11312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-and-multistage-fusion-for-dimensional-audiovisual-emotion-recognition-2002.11312"/></url>
<url><loc>https://scifaro.com/en/abs/but-system-for-the-second-dihard-speech-diarization-challenge-2002.11356</loc><lastmod>2020-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/but-system-for-the-second-dihard-speech-diarization-challenge-2002.11356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/but-system-for-the-second-dihard-speech-diarization-challenge-2002.11356"/></url>
<url><loc>https://scifaro.com/en/abs/skinaugment-auto-encoding-speaker-conversions-for-automatic-speech-translation-2002.12231</loc><lastmod>2020-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skinaugment-auto-encoding-speaker-conversions-for-automatic-speech-translation-2002.12231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skinaugment-auto-encoding-speaker-conversions-for-automatic-speech-translation-2002.12231"/></url>
<url><loc>https://scifaro.com/en/abs/auxiliary-function-based-algorithm-for-blind-extraction-of-a-moving-speaker-2002.12619</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auxiliary-function-based-algorithm-for-blind-extraction-of-a-moving-speaker-2002.12619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auxiliary-function-based-algorithm-for-blind-extraction-of-a-moving-speaker-2002.12619"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-using-eeg-2002.12756</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-using-eeg-2002.12756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-using-eeg-2002.12756"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-decision-tree-for-depression-recognition-in-speech-2002.12759</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-decision-tree-for-depression-recognition-in-speech-2002.12759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-decision-tree-for-depression-recognition-in-speech-2002.12759"/></url>
<url><loc>https://scifaro.com/en/abs/dihard-ii-is-still-hard-experimental-results-and-discussions-from-the-dku-lenovo-team-2002.12761</loc><lastmod>2020-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dihard-ii-is-still-hard-experimental-results-and-discussions-from-the-dku-lenovo-team-2002.12761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dihard-ii-is-still-hard-experimental-results-and-discussions-from-the-dku-lenovo-team-2002.12761"/></url>
<url><loc>https://scifaro.com/en/abs/towards-learning-a-universal-non-semantic-representation-of-speech-2002.12764</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-learning-a-universal-non-semantic-representation-of-speech-2002.12764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-learning-a-universal-non-semantic-representation-of-speech-2002.12764"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-continuous-valence-and-arousal-prediction-in-the-wild-using-deep-3d-features-and-sequence-modeling-2002.12766</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-continuous-valence-and-arousal-prediction-in-the-wild-using-deep-3d-features-and-sequence-modeling-2002.12766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-continuous-valence-and-arousal-prediction-in-the-wild-using-deep-3d-features-and-sequence-modeling-2002.12766"/></url>
<url><loc>https://scifaro.com/en/abs/identification-of-dementia-using-audio-biomarkers-2002.12788</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-of-dementia-using-audio-biomarkers-2002.12788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-of-dementia-using-audio-biomarkers-2002.12788"/></url>
<url><loc>https://scifaro.com/en/abs/deep-residual-dense-lattice-network-for-speech-enhancement-2002.12794</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-residual-dense-lattice-network-for-speech-enhancement-2002.12794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-residual-dense-lattice-network-for-speech-enhancement-2002.12794"/></url>
<url><loc>https://scifaro.com/en/abs/a-i-based-embedded-speech-to-text-using-deepspeech-2002.12830</loc><lastmod>2020-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-i-based-embedded-speech-to-text-using-deepspeech-2002.12830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-i-based-embedded-speech-to-text-using-deepspeech-2002.12830"/></url>
<url><loc>https://scifaro.com/en/abs/generating-eeg-features-from-acoustic-features-2003.00007</loc><lastmod>2020-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-eeg-features-from-acoustic-features-2003.00007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-eeg-features-from-acoustic-features-2003.00007"/></url>
<url><loc>https://scifaro.com/en/abs/expression-recognition-in-the-wild-using-sequence-modeling-2003.00170</loc><lastmod>2020-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expression-recognition-in-the-wild-using-sequence-modeling-2003.00170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expression-recognition-in-the-wild-using-sequence-modeling-2003.00170"/></url>
<url><loc>https://scifaro.com/en/abs/pathological-speech-detection-using-x-vector-embeddings-2003.00864</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pathological-speech-detection-using-x-vector-embeddings-2003.00864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pathological-speech-detection-using-x-vector-embeddings-2003.00864"/></url>
<url><loc>https://scifaro.com/en/abs/inferring-the-location-of-reflecting-surfaces-exploiting-loudspeaker-directivity-2003.01117</loc><lastmod>2020-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inferring-the-location-of-reflecting-surfaces-exploiting-loudspeaker-directivity-2003.01117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inferring-the-location-of-reflecting-surfaces-exploiting-loudspeaker-directivity-2003.01117"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-singing-voice-separation-by-deep-neural-network-informed-doa-constrained-cnmf-2003.01162</loc><lastmod>2020-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-singing-voice-separation-by-deep-neural-network-informed-doa-constrained-cnmf-2003.01162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-singing-voice-separation-by-deep-neural-network-informed-doa-constrained-cnmf-2003.01162"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-of-glottal-pulse-positions-in-a-neural-analysis-synthesis-framework-2003.01220</loc><lastmod>2020-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-of-glottal-pulse-positions-in-a-neural-analysis-synthesis-framework-2003.01220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-of-glottal-pulse-positions-in-a-neural-analysis-synthesis-framework-2003.01220"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-silence-feature-in-dimensional-speech-emotion-recognition-2003.01277</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-silence-feature-in-dimensional-speech-emotion-recognition-2003.01277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-silence-feature-in-dimensional-speech-emotion-recognition-2003.01277"/></url>
<url><loc>https://scifaro.com/en/abs/amateur-drones-detection-a-machine-learning-approach-utilizing-the-acoustic-signals-in-the-presence-of-strong-interference-2003.01519</loc><lastmod>2020-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amateur-drones-detection-a-machine-learning-approach-utilizing-the-acoustic-signals-in-the-presence-of-strong-interference-2003.01519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amateur-drones-detection-a-machine-learning-approach-utilizing-the-acoustic-signals-in-the-presence-of-strong-interference-2003.01519"/></url>
<url><loc>https://scifaro.com/en/abs/voice-separation-with-an-unknown-number-of-multiple-speakers-2003.01531</loc><lastmod>2020-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-separation-with-an-unknown-number-of-multiple-speakers-2003.01531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-separation-with-an-unknown-number-of-multiple-speakers-2003.01531"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-interpretable-representation-learning-for-singing-voice-separation-2003.01567</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-interpretable-representation-learning-for-singing-voice-separation-2003.01567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-interpretable-representation-learning-for-singing-voice-separation-2003.01567"/></url>
<url><loc>https://scifaro.com/en/abs/seld-tcn-sound-event-localization-detection-via-temporal-convolutional-networks-2003.01609</loc><lastmod>2021-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seld-tcn-sound-event-localization-detection-via-temporal-convolutional-networks-2003.01609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seld-tcn-sound-event-localization-detection-via-temporal-convolutional-networks-2003.01609"/></url>
<url><loc>https://scifaro.com/en/abs/towards-real-time-mispronunciation-detection-in-kids-speech-2003.01765</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-real-time-mispronunciation-detection-in-kids-speech-2003.01765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-real-time-mispronunciation-detection-in-kids-speech-2003.01765"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-feedback-for-speech-enhancement-with-and-without-parallel-speech-data-2003.01769</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-feedback-for-speech-enhancement-with-and-without-parallel-speech-data-2003.01769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-feedback-for-speech-enhancement-with-and-without-parallel-speech-data-2003.01769"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-speech-dereverberation-2003.01861</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-speech-dereverberation-2003.01861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-complex-spectral-mapping-for-speech-dereverberation-2003.01861"/></url>
<url><loc>https://scifaro.com/en/abs/learning-fast-adaptation-on-cross-accented-speech-recognition-2003.01901</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-fast-adaptation-on-cross-accented-speech-recognition-2003.01901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-fast-adaptation-on-cross-accented-speech-recognition-2003.01901"/></url>
<url><loc>https://scifaro.com/en/abs/graphtts-graph-to-sequence-modelling-in-neural-text-to-speech-2003.01924</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graphtts-graph-to-sequence-modelling-in-neural-text-to-speech-2003.01924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graphtts-graph-to-sequence-modelling-in-neural-text-to-speech-2003.01924"/></url>
<url><loc>https://scifaro.com/en/abs/aligntts-efficient-feed-forward-text-to-speech-system-without-explicit-alignment-2003.01950</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aligntts-efficient-feed-forward-text-to-speech-system-without-explicit-alignment-2003.01950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aligntts-efficient-feed-forward-text-to-speech-system-without-explicit-alignment-2003.01950"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-speaker-clustering-method-based-on-discrete-tied-variational-autoencoder-2003.01955</loc><lastmod>2020-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-speaker-clustering-method-based-on-discrete-tied-variational-autoencoder-2003.01955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-speaker-clustering-method-based-on-discrete-tied-variational-autoencoder-2003.01955"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-universal-and-robust-adversarial-attacks-against-speaker-recognition-systems-2003.02301</loc><lastmod>2020-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-universal-and-robust-adversarial-attacks-against-speaker-recognition-systems-2003.02301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-universal-and-robust-adversarial-attacks-against-speaker-recognition-systems-2003.02301"/></url>
<url><loc>https://scifaro.com/en/abs/auto-tuning-spectral-clustering-for-speaker-diarization-using-normalized-maximum-eigengap-2003.02405</loc><lastmod>2020-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-tuning-spectral-clustering-for-speaker-diarization-using-normalized-maximum-eigengap-2003.02405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-tuning-spectral-clustering-for-speaker-diarization-using-normalized-maximum-eigengap-2003.02405"/></url>
<url><loc>https://scifaro.com/en/abs/overdetermined-independent-vector-analysis-2003.02458</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overdetermined-independent-vector-analysis-2003.02458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overdetermined-independent-vector-analysis-2003.02458"/></url>
<url><loc>https://scifaro.com/en/abs/guided-generative-adversarial-neural-network-for-representation-learning-and-high-fidelity-audio-generation-using-fewer-labelled-audio-data-2003.02836</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-generative-adversarial-neural-network-for-representation-learning-and-high-fidelity-audio-generation-using-fewer-labelled-audio-data-2003.02836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-generative-adversarial-neural-network-for-representation-learning-and-high-fidelity-audio-generation-using-fewer-labelled-audio-data-2003.02836"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-context-dependent-units-boundary-correction-for-corpus-based-unit-selection-text-to-speech-2003.02837</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-context-dependent-units-boundary-correction-for-corpus-based-unit-selection-text-to-speech-2003.02837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-context-dependent-units-boundary-correction-for-corpus-based-unit-selection-text-to-speech-2003.02837"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-neural-diarization-reformulating-speaker-diarization-as-simple-multi-label-classification-2003.02966</loc><lastmod>2020-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-neural-diarization-reformulating-speaker-diarization-as-simple-multi-label-classification-2003.02966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-neural-diarization-reformulating-speaker-diarization-as-simple-multi-label-classification-2003.02966"/></url>
<url><loc>https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-spoofing-countermeasures-of-asv-2003.03065</loc><lastmod>2020-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-spoofing-countermeasures-of-asv-2003.03065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-spoofing-countermeasures-of-asv-2003.03065"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-development-of-asr-systems-for-multilingual-code-switched-speech-in-under-resourced-languages-2003.03135</loc><lastmod>2020-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-development-of-asr-systems-for-multilingual-code-switched-speech-in-under-resourced-languages-2003.03135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-development-of-asr-systems-for-multilingual-code-switched-speech-in-under-resourced-languages-2003.03135"/></url>
<url><loc>https://scifaro.com/en/abs/multi-time-scale-convolution-for-emotion-recognition-from-speech-audio-signals-2003.03375</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-time-scale-convolution-for-emotion-recognition-from-speech-audio-signals-2003.03375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-time-scale-convolution-for-emotion-recognition-from-speech-audio-signals-2003.03375"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-speaker-verification-for-online-identification-of-new-speakers-with-short-segments-2003.03432</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-speaker-verification-for-online-identification-of-new-speakers-with-short-segments-2003.03432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-speaker-verification-for-online-identification-of-new-speakers-with-short-segments-2003.03432"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-end-to-end-multi-channel-speech-separation-via-spatial-feature-learning-2003.03927</loc><lastmod>2020-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-end-to-end-multi-channel-speech-separation-via-spatial-feature-learning-2003.03927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-end-to-end-multi-channel-speech-separation-via-spatial-feature-learning-2003.03927"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-real-noisy-reverberant-meetings-with-all-neural-source-separation-counting-and-diarization-system-2003.03987</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-real-noisy-reverberant-meetings-with-all-neural-source-separation-counting-and-diarization-system-2003.03987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-real-noisy-reverberant-meetings-with-all-neural-source-separation-counting-and-diarization-system-2003.03987"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noise-robust-automatic-speech-recognition-with-single-channel-time-domain-enhancement-network-2003.03998</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noise-robust-automatic-speech-recognition-with-single-channel-time-domain-enhancement-network-2003.03998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noise-robust-automatic-speech-recognition-with-single-channel-time-domain-enhancement-network-2003.03998"/></url>
<url><loc>https://scifaro.com/en/abs/toward-cross-domain-speech-recognition-with-end-to-end-models-2003.04194</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-cross-domain-speech-recognition-with-end-to-end-models-2003.04194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-cross-domain-speech-recognition-with-end-to-end-models-2003.04194"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speech-processing-a-survey-from-large-corpora-to-limited-data-2003.04241</loc><lastmod>2020-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speech-processing-a-survey-from-large-corpora-to-limited-data-2003.04241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-networks-for-automatic-speech-processing-a-survey-from-large-corpora-to-limited-data-2003.04241"/></url>
<url><loc>https://scifaro.com/en/abs/vowels-and-prosody-contribution-in-neural-network-based-voice-conversion-algorithm-with-noisy-training-data-2003.04640</loc><lastmod>2020-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vowels-and-prosody-contribution-in-neural-network-based-voice-conversion-algorithm-with-noisy-training-data-2003.04640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vowels-and-prosody-contribution-in-neural-network-based-voice-conversion-algorithm-with-noisy-training-data-2003.04640"/></url>
<url><loc>https://scifaro.com/en/abs/development-of-automatic-speech-recognition-for-kazakh-language-using-transfer-learning-2003.04710</loc><lastmod>2020-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-of-automatic-speech-recognition-for-kazakh-language-using-transfer-learning-2003.04710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-of-automatic-speech-recognition-for-kazakh-language-using-transfer-learning-2003.04710"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-using-eeg-2003.04733</loc><lastmod>2020-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-using-eeg-2003.04733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-using-eeg-2003.04733"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-using-coefficient-mapping-and-neural-network-2003.05184</loc><lastmod>2020-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-using-coefficient-mapping-and-neural-network-2003.05184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-using-coefficient-mapping-and-neural-network-2003.05184"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-watermarking-using-graph-based-transform-and-singular-value-decomposition-2003.05223</loc><lastmod>2020-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-watermarking-using-graph-based-transform-and-singular-value-decomposition-2003.05223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-watermarking-using-graph-based-transform-and-singular-value-decomposition-2003.05223"/></url>
<url><loc>https://scifaro.com/en/abs/bringing-in-the-outliers-a-sparse-subspace-clustering-approach-to-learn-a-dictionary-of-mouse-ultrasonic-vocalizations-2003.05897</loc><lastmod>2020-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bringing-in-the-outliers-a-sparse-subspace-clustering-approach-to-learn-a-dictionary-of-mouse-ultrasonic-vocalizations-2003.05897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bringing-in-the-outliers-a-sparse-subspace-clustering-approach-to-learn-a-dictionary-of-mouse-ultrasonic-vocalizations-2003.05897"/></url>
<url><loc>https://scifaro.com/en/abs/a-wide-dataset-of-ear-shapes-and-pinna-related-transfer-functions-generated-by-random-ear-drawings-2003.06182</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-wide-dataset-of-ear-shapes-and-pinna-related-transfer-functions-generated-by-random-ear-drawings-2003.06182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-wide-dataset-of-ear-shapes-and-pinna-related-transfer-functions-generated-by-random-ear-drawings-2003.06182"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-individualization-a-survey-2003.06183</loc><lastmod>2020-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-individualization-a-survey-2003.06183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-individualization-a-survey-2003.06183"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-musical-style-ranking-symbolic-music-based-on-similarity-to-a-style-2003.06226</loc><lastmod>2020-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-musical-style-ranking-symbolic-music-based-on-similarity-to-a-style-2003.06226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-musical-style-ranking-symbolic-music-based-on-similarity-to-a-style-2003.06226"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-style-and-content-separation-by-minimizing-mutual-information-for-speech-synthesis-2003.06227</loc><lastmod>2020-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-style-and-content-separation-by-minimizing-mutual-information-for-speech-synthesis-2003.06227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-style-and-content-separation-by-minimizing-mutual-information-for-speech-synthesis-2003.06227"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-spatial-aligment-requirements-of-central-and-peripheral-object-events-2003.06656</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-spatial-aligment-requirements-of-central-and-peripheral-object-events-2003.06656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-spatial-aligment-requirements-of-central-and-peripheral-object-events-2003.06656"/></url>
<url><loc>https://scifaro.com/en/abs/perception-of-prosodic-variation-for-speech-synthesis-using-an-unsupervised-discrete-representation-of-f0-2003.06686</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perception-of-prosodic-variation-for-speech-synthesis-using-an-unsupervised-discrete-representation-of-f0-2003.06686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perception-of-prosodic-variation-for-speech-synthesis-using-an-unsupervised-discrete-representation-of-f0-2003.06686"/></url>
<url><loc>https://scifaro.com/en/abs/a-proto-object-based-audiovisual-saliency-map-2003.06779</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-proto-object-based-audiovisual-saliency-map-2003.06779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-proto-object-based-audiovisual-saliency-map-2003.06779"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-gaussian-mixture-model-framework-for-speaker-adaptation-of-deep-neural-network-acoustic-models-2003.06894</loc><lastmod>2020-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-gaussian-mixture-model-framework-for-speaker-adaptation-of-deep-neural-network-acoustic-models-2003.06894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-gaussian-mixture-model-framework-for-speaker-adaptation-of-deep-neural-network-acoustic-models-2003.06894"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-multi-channel-target-speech-separation-2003.07032</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-multi-channel-target-speech-separation-2003.07032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-multi-channel-target-speech-separation-2003.07032"/></url>
<url><loc>https://scifaro.com/en/abs/tensorflow-audio-models-in-essentia-2003.07393</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tensorflow-audio-models-in-essentia-2003.07393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tensorflow-audio-models-in-essentia-2003.07393"/></url>
<url><loc>https://scifaro.com/en/abs/high-accuracy-and-low-latency-speech-recognition-with-two-head-contextual-layer-trajectory-lstm-model-2003.07482</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-accuracy-and-low-latency-speech-recognition-with-two-head-contextual-layer-trajectory-lstm-model-2003.07482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-accuracy-and-low-latency-speech-recognition-with-two-head-contextual-layer-trajectory-lstm-model-2003.07482"/></url>
<url><loc>https://scifaro.com/en/abs/deep-attention-fusion-feature-for-speech-separation-with-end-to-end-post-filter-method-2003.07544</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-attention-fusion-feature-for-speech-separation-with-end-to-end-post-filter-method-2003.07544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-attention-fusion-feature-for-speech-separation-with-end-to-end-post-filter-method-2003.07544"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-recurrent-denoising-autoencoder-embeddings-for-speaker-identification-2003.07688</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-recurrent-denoising-autoencoder-embeddings-for-speaker-identification-2003.07688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-recurrent-denoising-autoencoder-embeddings-for-speaker-identification-2003.07688"/></url>
<url><loc>https://scifaro.com/en/abs/asr-error-correction-and-domain-adaptation-using-machine-translation-2003.07692</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-error-correction-and-domain-adaptation-using-machine-translation-2003.07692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-error-correction-and-domain-adaptation-using-machine-translation-2003.07692"/></url>
<url><loc>https://scifaro.com/en/abs/audio-inpainting-with-generative-adversarial-network-2003.07704</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-inpainting-with-generative-adversarial-network-2003.07704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-inpainting-with-generative-adversarial-network-2003.07704"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-autoregressive-transducer-hat-2003.07705</loc><lastmod>2020-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-autoregressive-transducer-hat-2003.07705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-autoregressive-transducer-hat-2003.07705"/></url>
<url><loc>https://scifaro.com/en/abs/deliberation-model-based-two-pass-end-to-end-speech-recognition-2003.07962</loc><lastmod>2020-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deliberation-model-based-two-pass-end-to-end-speech-recognition-2003.07962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deliberation-model-based-two-pass-end-to-end-speech-recognition-2003.07962"/></url>
<url><loc>https://scifaro.com/en/abs/voice-and-accompaniment-separation-in-music-using-self-attention-convolutional-neural-network-2003.08954</loc><lastmod>2020-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-and-accompaniment-separation-in-music-using-self-attention-convolutional-neural-network-2003.08954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-and-accompaniment-separation-in-music-using-self-attention-convolutional-neural-network-2003.08954"/></url>
<url><loc>https://scifaro.com/en/abs/improving-embedding-extraction-for-speaker-verification-with-ladder-network-2003.09125</loc><lastmod>2020-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-embedding-extraction-for-speaker-verification-with-ladder-network-2003.09125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-embedding-extraction-for-speaker-verification-with-ladder-network-2003.09125"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-using-audio-tagging-2003.09164</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-audio-tagging-2003.09164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-using-audio-tagging-2003.09164"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-mismatch-between-text-script-and-voice-over-using-utterance-verification-based-on-phoneme-recognition-ranking-2003.09180</loc><lastmod>2020-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-mismatch-between-text-script-and-voice-over-using-utterance-verification-based-on-phoneme-recognition-ranking-2003.09180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-mismatch-between-text-script-and-voice-over-using-utterance-verification-based-on-phoneme-recognition-ranking-2003.09180"/></url>
<url><loc>https://scifaro.com/en/abs/deep-generative-variational-autoencoding-for-replay-spoof-detection-in-automatic-speaker-verification-2003.09542</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-generative-variational-autoencoding-for-replay-spoof-detection-in-automatic-speaker-verification-2003.09542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-generative-variational-autoencoding-for-replay-spoof-detection-in-automatic-speaker-verification-2003.09542"/></url>
<url><loc>https://scifaro.com/en/abs/audio-impairment-recognition-using-a-correlation-based-feature-representation-2003.09889</loc><lastmod>2021-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-impairment-recognition-using-a-correlation-based-feature-representation-2003.09889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-impairment-recognition-using-a-correlation-based-feature-representation-2003.09889"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-asr-for-simultaneous-speech-translation-2003.09891</loc><lastmod>2020-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-asr-for-simultaneous-speech-translation-2003.09891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-asr-for-simultaneous-speech-translation-2003.09891"/></url>
<url><loc>https://scifaro.com/en/abs/high-performance-sequence-to-sequence-model-for-streaming-speech-recognition-2003.10022</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-performance-sequence-to-sequence-model-for-streaming-speech-recognition-2003.10022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-performance-sequence-to-sequence-model-for-streaming-speech-recognition-2003.10022"/></url>
<url><loc>https://scifaro.com/en/abs/dialect-identification-of-spoken-north-s-ami-language-varieties-using-prosodic-features-2003.10183</loc><lastmod>2020-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialect-identification-of-spoken-north-s-ami-language-varieties-using-prosodic-features-2003.10183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialect-identification-of-spoken-north-s-ami-language-varieties-using-prosodic-features-2003.10183"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-end-to-end-streaming-speech-recognition-with-a-scout-network-2003.10369</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-end-to-end-streaming-speech-recognition-with-a-scout-network-2003.10369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-end-to-end-streaming-speech-recognition-with-a-scout-network-2003.10369"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-error-and-correlation-based-loss-functions-for-multitask-learning-dimensional-speech-emotion-recognition-2003.10724</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-error-and-correlation-based-loss-functions-for-multitask-learning-dimensional-speech-emotion-recognition-2003.10724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-error-and-correlation-based-loss-functions-for-multitask-learning-dimensional-speech-emotion-recognition-2003.10724"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-voice-conversion-system-with-wavenet-vocoder-and-collapsed-speech-suppression-2003.11750</loc><lastmod>2020-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-system-with-wavenet-vocoder-and-collapsed-speech-suppression-2003.11750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-system-with-wavenet-vocoder-and-collapsed-speech-suppression-2003.11750"/></url>
<url><loc>https://scifaro.com/en/abs/speech-quality-factors-for-traditional-and-neural-based-low-bit-rate-vocoders-2003.11882</loc><lastmod>2020-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-quality-factors-for-traditional-and-neural-based-low-bit-rate-vocoders-2003.11882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-quality-factors-for-traditional-and-neural-based-low-bit-rate-vocoders-2003.11882"/></url>
<url><loc>https://scifaro.com/en/abs/in-defence-of-metric-learning-for-speaker-recognition-2003.11982</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-defence-of-metric-learning-for-speaker-recognition-2003.11982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-defence-of-metric-learning-for-speaker-recognition-2003.11982"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-multi-objective-deep-learning-speech-denoising-methods-2003.12108</loc><lastmod>2020-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-multi-objective-deep-learning-speech-denoising-methods-2003.12108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-multi-objective-deep-learning-speech-denoising-methods-2003.12108"/></url>
<url><loc>https://scifaro.com/en/abs/dual-attention-in-time-and-frequency-domain-for-voice-activity-detection-2003.12266</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-attention-in-time-and-frequency-domain-for-voice-activity-detection-2003.12266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-attention-in-time-and-frequency-domain-for-voice-activity-detection-2003.12266"/></url>
<url><loc>https://scifaro.com/en/abs/separating-varying-numbers-of-sources-with-auxiliary-autoencoding-loss-2003.12326</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separating-varying-numbers-of-sources-with-auxiliary-autoencoding-loss-2003.12326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separating-varying-numbers-of-sources-with-auxiliary-autoencoding-loss-2003.12326"/></url>
<url><loc>https://scifaro.com/en/abs/can-you-hear-me-textit-now-sensitive-comparisons-of-human-and-machine-perception-2003.12362</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-you-hear-me-textit-now-sensitive-comparisons-of-human-and-machine-perception-2003.12362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-you-hear-me-textit-now-sensitive-comparisons-of-human-and-machine-perception-2003.12362"/></url>
<url><loc>https://scifaro.com/en/abs/training-for-speech-recognition-on-coprocessors-2003.12366</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-for-speech-recognition-on-coprocessors-2003.12366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-for-speech-recognition-on-coprocessors-2003.12366"/></url>
<url><loc>https://scifaro.com/en/abs/mic2mic-using-cycle-consistent-generative-adversarial-networks-to-overcome-microphone-variability-in-speech-systems-2003.12425</loc><lastmod>2020-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mic2mic-using-cycle-consistent-generative-adversarial-networks-to-overcome-microphone-variability-in-speech-systems-2003.12425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mic2mic-using-cycle-consistent-generative-adversarial-networks-to-overcome-microphone-variability-in-speech-systems-2003.12425"/></url>
<url><loc>https://scifaro.com/en/abs/mechanical-classification-of-voice-quality-2003.13033</loc><lastmod>2020-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mechanical-classification-of-voice-quality-2003.13033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mechanical-classification-of-voice-quality-2003.13033"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-speech-adversarial-examples-using-self-attention-u-net-enhancement-2003.13917</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-speech-adversarial-examples-using-self-attention-u-net-enhancement-2003.13917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-speech-adversarial-examples-using-self-attention-u-net-enhancement-2003.13917"/></url>
<url><loc>https://scifaro.com/en/abs/vapar-synth-a-variational-parametric-model-for-audio-synthesis-2004.00001</loc><lastmod>2020-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vapar-synth-a-variational-parametric-model-for-audio-synthesis-2004.00001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vapar-synth-a-variational-parametric-model-for-audio-synthesis-2004.00001"/></url>
<url><loc>https://scifaro.com/en/abs/improved-source-counting-and-separation-for-monaural-mixture-2004.00175</loc><lastmod>2020-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-source-counting-and-separation-for-monaural-mixture-2004.00175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-source-counting-and-separation-for-monaural-mixture-2004.00175"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-differences-between-song-and-speech-emotion-recognition-effect-of-feature-sets-feature-types-and-classifiers-2004.00200</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-differences-between-song-and-speech-emotion-recognition-effect-of-feature-sets-feature-types-and-classifiers-2004.00200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-differences-between-song-and-speech-emotion-recognition-effect-of-feature-sets-feature-types-and-classifiers-2004.00200"/></url>
<url><loc>https://scifaro.com/en/abs/improved-rawnet-with-feature-map-scaling-for-text-independent-speaker-verification-using-raw-waveforms-2004.00526</loc><lastmod>2020-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-rawnet-with-feature-map-scaling-for-text-independent-speaker-verification-using-raw-waveforms-2004.00526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-rawnet-with-feature-map-scaling-for-text-independent-speaker-verification-using-raw-waveforms-2004.00526"/></url>
<url><loc>https://scifaro.com/en/abs/improving-auditory-attention-decoding-performance-of-linear-and-non-linear-methods-using-state-space-model-2004.00910</loc><lastmod>2020-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-auditory-attention-decoding-performance-of-linear-and-non-linear-methods-using-state-space-model-2004.00910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-auditory-attention-decoding-performance-of-linear-and-non-linear-methods-using-state-space-model-2004.00910"/></url>
<url><loc>https://scifaro.com/en/abs/imetricgan-intelligibility-enhancement-for-speech-in-noise-using-generative-adversarial-network-based-metric-learning-2004.00932</loc><lastmod>2020-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imetricgan-intelligibility-enhancement-for-speech-in-noise-using-generative-adversarial-network-based-metric-learning-2004.00932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imetricgan-intelligibility-enhancement-for-speech-in-noise-using-generative-adversarial-network-based-metric-learning-2004.00932"/></url>
<url><loc>https://scifaro.com/en/abs/the-rwth-asr-system-for-ted-lium-release-2-improving-hybrid-hmm-with-specaugment-2004.00960</loc><lastmod>2020-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-rwth-asr-system-for-ted-lium-release-2-improving-hybrid-hmm-with-specaugment-2004.00960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-rwth-asr-system-for-ted-lium-release-2-improving-hybrid-hmm-with-specaugment-2004.00960"/></url>
<url><loc>https://scifaro.com/en/abs/full-sum-decoding-for-hybrid-hmm-based-speech-recognition-using-lstm-language-model-2004.00967</loc><lastmod>2020-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-sum-decoding-for-hybrid-hmm-based-speech-recognition-using-lstm-language-model-2004.00967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-sum-decoding-for-hybrid-hmm-based-speech-recognition-using-lstm-language-model-2004.00967"/></url>
<url><loc>https://scifaro.com/en/abs/towards-relevance-and-sequence-modeling-in-language-recognition-2004.01221</loc><lastmod>2020-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-relevance-and-sequence-modeling-in-language-recognition-2004.01221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-relevance-and-sequence-modeling-in-language-recognition-2004.01221"/></url>
<url><loc>https://scifaro.com/en/abs/ai4covid-19-ai-enabled-preliminary-diagnosis-for-covid-19-from-cough-samples-via-an-app-2004.01275</loc><lastmod>2020-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai4covid-19-ai-enabled-preliminary-diagnosis-for-covid-19-from-cough-samples-via-an-app-2004.01275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai4covid-19-ai-enabled-preliminary-diagnosis-for-covid-19-from-cough-samples-via-an-app-2004.01275"/></url>
<url><loc>https://scifaro.com/en/abs/can-machine-learning-be-used-to-recognize-and-diagnose-coughs-2004.01495</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-machine-learning-be-used-to-recognize-and-diagnose-coughs-2004.01495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-machine-learning-be-used-to-recognize-and-diagnose-coughs-2004.01495"/></url>
<url><loc>https://scifaro.com/en/abs/towards-democratizing-music-production-with-ai-design-of-variational-autoencoder-based-rhythm-generator-as-a-daw-plugin-2004.01525</loc><lastmod>2020-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-democratizing-music-production-with-ai-design-of-variational-autoencoder-based-rhythm-generator-as-a-daw-plugin-2004.01525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-democratizing-music-production-with-ai-design-of-variational-autoencoder-based-rhythm-generator-as-a-daw-plugin-2004.01525"/></url>
<url><loc>https://scifaro.com/en/abs/temporarily-aware-context-modelling-using-generative-adversarial-networks-for-speech-activity-detection-2004.01546</loc><lastmod>2020-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporarily-aware-context-modelling-using-generative-adversarial-networks-for-speech-activity-detection-2004.01546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporarily-aware-context-modelling-using-generative-adversarial-networks-for-speech-activity-detection-2004.01546"/></url>
<url><loc>https://scifaro.com/en/abs/neural-i-vectors-2004.01559</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-i-vectors-2004.01559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-i-vectors-2004.01559"/></url>
<url><loc>https://scifaro.com/en/abs/subband-modeling-for-spoofing-detection-in-automatic-speaker-verification-2004.01922</loc><lastmod>2020-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subband-modeling-for-spoofing-detection-in-automatic-speaker-verification-2004.01922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subband-modeling-for-spoofing-detection-in-automatic-speaker-verification-2004.01922"/></url>
<url><loc>https://scifaro.com/en/abs/using-cyclic-noise-as-the-source-signal-for-neural-source-filter-based-speech-waveform-model-2004.02191</loc><lastmod>2020-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-cyclic-noise-as-the-source-signal-for-neural-source-filter-based-speech-waveform-model-2004.02191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-cyclic-noise-as-the-source-signal-for-neural-source-filter-based-speech-waveform-model-2004.02191"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multilayer-perceptrons-for-dimensional-speech-emotion-recognition-2004.02355</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multilayer-perceptrons-for-dimensional-speech-emotion-recognition-2004.02355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multilayer-perceptrons-for-dimensional-speech-emotion-recognition-2004.02355"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-denoising-and-dereverberation-using-deep-embedding-features-2004.02420</loc><lastmod>2020-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-denoising-and-dereverberation-using-deep-embedding-features-2004.02420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-denoising-and-dereverberation-using-deep-embedding-features-2004.02420"/></url>
<url><loc>https://scifaro.com/en/abs/a-bio-inspired-geometric-model-for-sound-reconstruction-2004.02450</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bio-inspired-geometric-model-for-sound-reconstruction-2004.02450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bio-inspired-geometric-model-for-sound-reconstruction-2004.02450"/></url>
<url><loc>https://scifaro.com/en/abs/vocoder-based-speech-synthesis-from-silent-videos-2004.02541</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocoder-based-speech-synthesis-from-silent-videos-2004.02541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocoder-based-speech-synthesis-from-silent-videos-2004.02541"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-for-short-utterance-speaker-recognition-with-imbalance-length-pairs-2004.02863</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-for-short-utterance-speaker-recognition-with-imbalance-length-pairs-2004.02863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-for-short-utterance-speaker-recognition-with-imbalance-length-pairs-2004.02863"/></url>
<url><loc>https://scifaro.com/en/abs/improving-multi-scale-aggregation-using-feature-pyramid-module-for-robust-speaker-verification-of-variable-duration-utterances-2004.03194</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-multi-scale-aggregation-using-feature-pyramid-module-for-robust-speaker-verification-of-variable-duration-utterances-2004.03194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-multi-scale-aggregation-using-feature-pyramid-module-for-robust-speaker-verification-of-variable-duration-utterances-2004.03194"/></url>
<url><loc>https://scifaro.com/en/abs/universal-adversarial-perturbations-generative-network-for-speaker-recognition-2004.03428</loc><lastmod>2020-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-adversarial-perturbations-generative-network-for-speaker-recognition-2004.03428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-adversarial-perturbations-generative-network-for-speaker-recognition-2004.03428"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-fool-the-speaker-recognition-2004.03434</loc><lastmod>2020-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-fool-the-speaker-recognition-2004.03434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-fool-the-speaker-recognition-2004.03434"/></url>
<url><loc>https://scifaro.com/en/abs/homophone-based-label-smoothing-in-end-to-end-automatic-speech-recognition-2004.03437</loc><lastmod>2020-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/homophone-based-label-smoothing-in-end-to-end-automatic-speech-recognition-2004.03437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/homophone-based-label-smoothing-in-end-to-end-automatic-speech-recognition-2004.03437"/></url>
<url><loc>https://scifaro.com/en/abs/snr-based-features-and-diverse-training-data-for-robust-dnn-based-speech-enhancement-2004.03512</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snr-based-features-and-diverse-training-data-for-robust-dnn-based-speech-enhancement-2004.03512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snr-based-features-and-diverse-training-data-for-robust-dnn-based-speech-enhancement-2004.03512"/></url>
<url><loc>https://scifaro.com/en/abs/from-artificial-neural-networks-to-deep-learning-for-music-generation-history-concepts-and-trends-2004.03586</loc><lastmod>2020-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-artificial-neural-networks-to-deep-learning-for-music-generation-history-concepts-and-trends-2004.03586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-artificial-neural-networks-to-deep-learning-for-music-generation-history-concepts-and-trends-2004.03586"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-voice-conversion-with-cycle-consistent-adversarial-network-2004.03781</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-voice-conversion-with-cycle-consistent-adversarial-network-2004.03781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-voice-conversion-with-cycle-consistent-adversarial-network-2004.03781"/></url>
<url><loc>https://scifaro.com/en/abs/multi-target-emotional-voice-conversion-with-neural-vocoders-2004.03782</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-target-emotional-voice-conversion-with-neural-vocoders-2004.03782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-target-emotional-voice-conversion-with-neural-vocoders-2004.03782"/></url>
<url><loc>https://scifaro.com/en/abs/noise-tokens-learning-neural-noise-templates-for-environment-aware-speech-enhancement-2004.04001</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-tokens-learning-neural-noise-templates-for-environment-aware-speech-enhancement-2004.04001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-tokens-learning-neural-noise-templates-for-environment-aware-speech-enhancement-2004.04001"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-x-vector-bayesian-neural-network-based-x-vector-system-for-speaker-verification-2004.04014</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-x-vector-bayesian-neural-network-based-x-vector-system-for-speaker-verification-2004.04014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-x-vector-bayesian-neural-network-based-x-vector-system-for-speaker-verification-2004.04014"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-singing-voice-separation-for-singing-voice-detection-in-polyphonic-music-2004.04040</loc><lastmod>2022-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-singing-voice-separation-for-singing-voice-detection-in-polyphonic-music-2004.04040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-singing-voice-separation-for-singing-voice-detection-in-polyphonic-music-2004.04040"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-acoustic-and-language-model-training-for-english-isizulu-code-switched-speech-recognition-2004.04054</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-acoustic-and-language-model-training-for-english-isizulu-code-switched-speech-recognition-2004.04054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-acoustic-and-language-model-training-for-english-isizulu-code-switched-speech-recognition-2004.04054"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-moe-based-framework-for-classification-of-respiratory-anomalies-and-lung-disease-detection-2004.04072</loc><lastmod>2020-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-moe-based-framework-for-classification-of-respiratory-anomalies-and-lung-disease-detection-2004.04072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-moe-based-framework-for-classification-of-respiratory-anomalies-and-lung-disease-detection-2004.04072"/></url>
<url><loc>https://scifaro.com/en/abs/deep-normalization-for-speaker-vectors-2004.04095</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-normalization-for-speaker-vectors-2004.04095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-normalization-for-speaker-vectors-2004.04095"/></url>
<url><loc>https://scifaro.com/en/abs/probabilistic-embeddings-for-speaker-diarization-2004.04096</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probabilistic-embeddings-for-speaker-diarization-2004.04096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probabilistic-embeddings-for-speaker-diarization-2004.04096"/></url>
<url><loc>https://scifaro.com/en/abs/wavecrn-an-efficient-convolutional-recurrent-neural-network-for-end-to-end-speech-enhancement-2004.04098</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavecrn-an-efficient-convolutional-recurrent-neural-network-for-end-to-end-speech-enhancement-2004.04098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavecrn-an-efficient-convolutional-recurrent-neural-network-for-end-to-end-speech-enhancement-2004.04098"/></url>
<url><loc>https://scifaro.com/en/abs/keywords-extraction-and-sentiment-analysis-using-automatic-speech-recognition-2004.04099</loc><lastmod>2020-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keywords-extraction-and-sentiment-analysis-using-automatic-speech-recognition-2004.04099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keywords-extraction-and-sentiment-analysis-using-automatic-speech-recognition-2004.04099"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-phone-based-subword-units-for-end-to-end-speech-recognition-2004.04290</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-phone-based-subword-units-for-end-to-end-speech-recognition-2004.04290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-phone-based-subword-units-for-end-to-end-speech-recognition-2004.04290"/></url>
<url><loc>https://scifaro.com/en/abs/mdcnn-sid-multi-scale-dilated-convolution-network-for-singer-identification-2004.04371</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdcnn-sid-multi-scale-dilated-convolution-network-for-singer-identification-2004.04371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdcnn-sid-multi-scale-dilated-convolution-network-for-singer-identification-2004.04371"/></url>
<url><loc>https://scifaro.com/en/abs/att-hack-an-expressive-speech-database-with-social-attitudes-2004.04410</loc><lastmod>2020-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/att-hack-an-expressive-speech-database-with-social-attitudes-2004.04410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/att-hack-an-expressive-speech-database-with-social-attitudes-2004.04410"/></url>
<url><loc>https://scifaro.com/en/abs/fast-frequency-discrimination-and-phoneme-recognition-using-a-biomimetic-membrane-coupled-to-a-neural-network-2004.04459</loc><lastmod>2020-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-frequency-discrimination-and-phoneme-recognition-using-a-biomimetic-membrane-coupled-to-a-neural-network-2004.04459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-frequency-discrimination-and-phoneme-recognition-using-a-biomimetic-membrane-coupled-to-a-neural-network-2004.04459"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-speech-synthesis-using-eeg-2004.04731</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-speech-synthesis-using-eeg-2004.04731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-speech-synthesis-using-eeg-2004.04731"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speech-representations-with-multi-target-autoregressive-predictive-coding-2004.05274</loc><lastmod>2020-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speech-representations-with-multi-target-autoregressive-predictive-coding-2004.05274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speech-representations-with-multi-target-autoregressive-predictive-coding-2004.05274"/></url>
<url><loc>https://scifaro.com/en/abs/from-inference-to-generation-end-to-end-fully-self-supervised-generation-of-human-face-from-speech-2004.05830</loc><lastmod>2020-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-inference-to-generation-end-to-end-fully-self-supervised-generation-of-human-face-from-speech-2004.05830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-inference-to-generation-end-to-end-fully-self-supervised-generation-of-human-face-from-speech-2004.05830"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-using-generative-networks-to-identify-dementia-2004.05989</loc><lastmod>2020-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-using-generative-networks-to-identify-dementia-2004.05989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-using-generative-networks-to-identify-dementia-2004.05989"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-model-and-optimal-si-snr-for-monaural-multi-speaker-speech-separation-in-noisy-environment-2004.06332</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-model-and-optimal-si-snr-for-monaural-multi-speaker-speech-separation-in-noisy-environment-2004.06332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-model-and-optimal-si-snr-for-monaural-multi-speaker-speech-separation-in-noisy-environment-2004.06332"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-grapheme-to-phoneme-conversion-2004.06338</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-grapheme-to-phoneme-conversion-2004.06338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-grapheme-to-phoneme-conversion-2004.06338"/></url>
<url><loc>https://scifaro.com/en/abs/an-explainability-study-of-the-constant-q-cepstral-coefficient-spoofing-countermeasure-for-automatic-speaker-verification-2004.06422</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-explainability-study-of-the-constant-q-cepstral-coefficient-spoofing-countermeasure-for-automatic-speaker-verification-2004.06422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-explainability-study-of-the-constant-q-cepstral-coefficient-spoofing-countermeasure-for-automatic-speaker-verification-2004.06422"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-acoustic-modelling-for-five-lingual-code-switched-asr-using-automatically-segmented-soap-opera-speech-2004.06480</loc><lastmod>2020-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-acoustic-modelling-for-five-lingual-code-switched-asr-using-automatically-segmented-soap-opera-speech-2004.06480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-acoustic-modelling-for-five-lingual-code-switched-asr-using-automatically-segmented-soap-opera-speech-2004.06480"/></url>
<url><loc>https://scifaro.com/en/abs/the-hearpiece-database-of-individual-transfer-functions-of-an-openly-available-in-the-ear-earpiece-for-hearing-device-research-2004.06579</loc><lastmod>2020-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hearpiece-database-of-individual-transfer-functions-of-an-openly-available-in-the-ear-earpiece-for-hearing-device-research-2004.06579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hearpiece-database-of-individual-transfer-functions-of-an-openly-available-in-the-ear-earpiece-for-hearing-device-research-2004.06579"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-with-lexical-information-2004.06756</loc><lastmod>2020-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-with-lexical-information-2004.06756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-with-lexical-information-2004.06756"/></url>
<url><loc>https://scifaro.com/en/abs/alzheimer-s-dementia-recognition-through-spontaneous-speech-the-adress-challenge-2004.06833</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alzheimer-s-dementia-recognition-through-spontaneous-speech-the-adress-challenge-2004.06833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alzheimer-s-dementia-recognition-through-spontaneous-speech-the-adress-challenge-2004.06833"/></url>
<url><loc>https://scifaro.com/en/abs/f0-consistent-many-to-many-non-parallel-voice-conversion-via-conditional-autoencoder-2004.07370</loc><lastmod>2020-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f0-consistent-many-to-many-non-parallel-voice-conversion-via-conditional-autoencoder-2004.07370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f0-consistent-many-to-many-non-parallel-voice-conversion-via-conditional-autoencoder-2004.07370"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-and-data-driven-amplitude-spectrum-prediction-for-hierarchical-neural-vocoders-2004.07832</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-and-data-driven-amplitude-spectrum-prediction-for-hierarchical-neural-vocoders-2004.07832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-and-data-driven-amplitude-spectrum-prediction-for-hierarchical-neural-vocoders-2004.07832"/></url>
<url><loc>https://scifaro.com/en/abs/sound-of-guns-digital-forensics-of-gun-audio-samples-meets-artificial-intelligence-2004.07948</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-of-guns-digital-forensics-of-gun-audio-samples-meets-artificial-intelligence-2004.07948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-of-guns-digital-forensics-of-gun-audio-samples-meets-artificial-intelligence-2004.07948"/></url>
<url><loc>https://scifaro.com/en/abs/speech-paralinguistic-approach-for-detecting-dementia-using-gated-convolutional-neural-network-2004.07992</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-paralinguistic-approach-for-detecting-dementia-using-gated-convolutional-neural-network-2004.07992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-paralinguistic-approach-for-detecting-dementia-using-gated-convolutional-neural-network-2004.07992"/></url>
<url><loc>https://scifaro.com/en/abs/acoustical-classification-of-different-speech-acts-using-nonlinear-methods-2004.08248</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustical-classification-of-different-speech-acts-using-nonlinear-methods-2004.08248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustical-classification-of-different-speech-acts-using-nonlinear-methods-2004.08248"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-teach-dnns-to-pay-attention-to-the-visual-modality-in-speech-recognition-2004.08250</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-teach-dnns-to-pay-attention-to-the-visual-modality-in-speech-recognition-2004.08250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-teach-dnns-to-pay-attention-to-the-visual-modality-in-speech-recognition-2004.08250"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-for-respiratory-sound-classification-in-wearable-devices-enabled-by-patient-specific-model-tuning-2004.08287</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-for-respiratory-sound-classification-in-wearable-devices-enabled-by-patient-specific-model-tuning-2004.08287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-for-respiratory-sound-classification-in-wearable-devices-enabled-by-patient-specific-model-tuning-2004.08287"/></url>
<url><loc>https://scifaro.com/en/abs/spex-multi-scale-time-domain-speaker-extraction-network-2004.08326</loc><lastmod>2020-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spex-multi-scale-time-domain-speaker-extraction-network-2004.08326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spex-multi-scale-time-domain-speaker-extraction-network-2004.08326"/></url>
<url><loc>https://scifaro.com/en/abs/matchboxnet-1d-time-channel-separable-convolutional-neural-network-architecture-for-speech-commands-recognition-2004.08531</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matchboxnet-1d-time-channel-separable-convolutional-neural-network-architecture-for-speech-commands-recognition-2004.08531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matchboxnet-1d-time-channel-separable-convolutional-neural-network-architecture-for-speech-commands-recognition-2004.08531"/></url>
<url><loc>https://scifaro.com/en/abs/the-attacker-s-perspective-on-automatic-speaker-verification-an-overview-2004.08849</loc><lastmod>2020-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-attacker-s-perspective-on-automatic-speaker-verification-an-overview-2004.08849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-attacker-s-perspective-on-automatic-speaker-verification-an-overview-2004.08849"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-whisper-to-natural-speech-conversion-using-modified-transformer-network-2004.09347</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-whisper-to-natural-speech-conversion-using-modified-transformer-network-2004.09347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-whisper-to-natural-speech-conversion-using-modified-transformer-network-2004.09347"/></url>
<url><loc>https://scifaro.com/en/abs/language-agnostic-multilingual-modeling-2004.09571</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-agnostic-multilingual-modeling-2004.09571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-agnostic-multilingual-modeling-2004.09571"/></url>
<url><loc>https://scifaro.com/en/abs/visqol-v3-an-open-source-production-ready-objective-speech-and-audio-metric-2004.09584</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visqol-v3-an-open-source-production-ready-objective-speech-and-audio-metric-2004.09584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visqol-v3-an-open-source-production-ready-objective-speech-and-audio-metric-2004.09584"/></url>
<url><loc>https://scifaro.com/en/abs/data-processing-for-optimizing-naturalness-of-vietnamese-text-to-speech-system-2004.09607</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-processing-for-optimizing-naturalness-of-vietnamese-text-to-speech-system-2004.09607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-processing-for-optimizing-naturalness-of-vietnamese-text-to-speech-system-2004.09607"/></url>
<url><loc>https://scifaro.com/en/abs/vector-quantized-contrastive-predictive-coding-for-template-based-music-generation-2004.10120</loc><lastmod>2020-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-quantized-contrastive-predictive-coding-for-template-based-music-generation-2004.10120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-quantized-contrastive-predictive-coding-for-template-based-music-generation-2004.10120"/></url>
<url><loc>https://scifaro.com/en/abs/music-generation-with-temporal-structure-augmentation-2004.10246</loc><lastmod>2020-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-generation-with-temporal-structure-augmentation-2004.10246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-generation-with-temporal-structure-augmentation-2004.10246"/></url>
<url><loc>https://scifaro.com/en/abs/towards-linking-the-lakh-and-imslp-datasets-2004.10391</loc><lastmod>2020-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-linking-the-lakh-and-imslp-datasets-2004.10391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-linking-the-lakh-and-imslp-datasets-2004.10391"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-competitive-end-to-end-speech-recognition-for-chime-6-dinner-party-transcription-2004.10799</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-competitive-end-to-end-speech-recognition-for-chime-6-dinner-party-transcription-2004.10799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-competitive-end-to-end-speech-recognition-for-chime-6-dinner-party-transcription-2004.10799"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-sequential-modeling-for-deep-gaussian-process-based-speech-synthesis-using-simple-recurrent-unit-2004.10823</loc><lastmod>2020-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-sequential-modeling-for-deep-gaussian-process-based-speech-synthesis-using-simple-recurrent-unit-2004.10823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-sequential-modeling-for-deep-gaussian-process-based-speech-synthesis-using-simple-recurrent-unit-2004.10823"/></url>
<url><loc>https://scifaro.com/en/abs/bytesing-a-chinese-singing-voice-synthesis-system-using-duration-allocated-encoder-decoder-acoustic-models-and-wavernn-vocoders-2004.11012</loc><lastmod>2021-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bytesing-a-chinese-singing-voice-synthesis-system-using-duration-allocated-encoder-decoder-acoustic-models-and-wavernn-vocoders-2004.11012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bytesing-a-chinese-singing-voice-synthesis-system-using-duration-allocated-encoder-decoder-acoustic-models-and-wavernn-vocoders-2004.11012"/></url>
<url><loc>https://scifaro.com/en/abs/flexible-framework-for-audio-reconstruction-2004.11162</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexible-framework-for-audio-reconstruction-2004.11162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexible-framework-for-audio-reconstruction-2004.11162"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-decomposition-via-triple-information-bottleneck-2004.11284</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-decomposition-via-triple-information-bottleneck-2004.11284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-decomposition-via-triple-information-bottleneck-2004.11284"/></url>
<url><loc>https://scifaro.com/en/abs/towards-fast-and-accurate-streaming-end-to-end-asr-2004.11544</loc><lastmod>2020-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-fast-and-accurate-streaming-end-to-end-asr-2004.11544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-fast-and-accurate-streaming-end-to-end-asr-2004.11544"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-audio-source-remixing-with-microphone-array-listening-devices-2004.11956</loc><lastmod>2020-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-audio-source-remixing-with-microphone-array-listening-devices-2004.11956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-audio-source-remixing-with-microphone-array-listening-devices-2004.11956"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-utilizing-graph-laplacian-regularization-with-event-co-occurrence-2004.12046</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-utilizing-graph-laplacian-regularization-with-event-co-occurrence-2004.12046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-utilizing-graph-laplacian-regularization-with-event-co-occurrence-2004.12046"/></url>
<url><loc>https://scifaro.com/en/abs/active-voice-authentication-2004.12071</loc><lastmod>2020-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-voice-authentication-2004.12071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-voice-authentication-2004.12071"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-fast-and-universal-audio-adversarial-attack-using-generative-model-2004.12261</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-fast-and-universal-audio-adversarial-attack-using-generative-model-2004.12261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-fast-and-universal-audio-adversarial-attack-using-generative-model-2004.12261"/></url>
<url><loc>https://scifaro.com/en/abs/time-frequency-analysis-and-parameterisation-of-knee-sounds-for-non-invasive-detection-of-osteoarthritis-2004.12745</loc><lastmod>2020-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-frequency-analysis-and-parameterisation-of-knee-sounds-for-non-invasive-detection-of-osteoarthritis-2004.12745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-frequency-analysis-and-parameterisation-of-knee-sounds-for-non-invasive-detection-of-osteoarthritis-2004.12745"/></url>
<url><loc>https://scifaro.com/en/abs/autoencoding-neural-networks-as-musical-audio-synthesizers-2004.13172</loc><lastmod>2020-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoencoding-neural-networks-as-musical-audio-synthesizers-2004.13172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoencoding-neural-networks-as-musical-audio-synthesizers-2004.13172"/></url>
<url><loc>https://scifaro.com/en/abs/l-vector-neural-label-embedding-for-domain-adaptation-2004.13480</loc><lastmod>2020-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l-vector-neural-label-embedding-for-domain-adaptation-2004.13480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l-vector-neural-label-embedding-for-domain-adaptation-2004.13480"/></url>
<url><loc>https://scifaro.com/en/abs/detect-language-of-transliterated-texts-2004.13521</loc><lastmod>2020-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detect-language-of-transliterated-texts-2004.13521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detect-language-of-transliterated-texts-2004.13521"/></url>
<url><loc>https://scifaro.com/en/abs/research-on-modeling-units-of-transformer-transducer-for-mandarin-speech-recognition-2004.13522</loc><lastmod>2020-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/research-on-modeling-units-of-transformer-transducer-for-mandarin-speech-recognition-2004.13522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/research-on-modeling-units-of-transformer-transducer-for-mandarin-speech-recognition-2004.13522"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-separation-using-spatially-distributed-microphones-2004.13670</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-separation-using-spatially-distributed-microphones-2004.13670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-separation-using-spatially-distributed-microphones-2004.13670"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-spoken-digit-generation-with-stylegan-2004.13764</loc><lastmod>2020-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-spoken-digit-generation-with-stylegan-2004.13764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-spoken-digit-generation-with-stylegan-2004.13764"/></url>
<url><loc>https://scifaro.com/en/abs/determined-bss-based-on-time-frequency-masking-and-its-application-to-harmonic-vector-analysis-2004.14091</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/determined-bss-based-on-time-frequency-masking-and-its-application-to-harmonic-vector-analysis-2004.14091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/determined-bss-based-on-time-frequency-masking-and-its-application-to-harmonic-vector-analysis-2004.14091"/></url>
<url><loc>https://scifaro.com/en/abs/copycat-many-to-many-fine-grained-prosody-transfer-for-neural-text-to-speech-2004.14617</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/copycat-many-to-many-fine-grained-prosody-transfer-for-neural-text-to-speech-2004.14617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/copycat-many-to-many-fine-grained-prosody-transfer-for-neural-text-to-speech-2004.14617"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speaker-extraction-network-2004.14762</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speaker-extraction-network-2004.14762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speaker-extraction-network-2004.14762"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-neural-network-model-of-human-cochlear-mechanics-and-filter-tuning-for-real-time-applications-2004.14832</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-neural-network-model-of-human-cochlear-mechanics-and-filter-tuning-for-real-time-applications-2004.14832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-neural-network-model-of-human-cochlear-mechanics-and-filter-tuning-for-real-time-applications-2004.14832"/></url>
<url><loc>https://scifaro.com/en/abs/multiresolution-and-multimodal-speech-recognition-with-transformers-2004.14840</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiresolution-and-multimodal-speech-recognition-with-transformers-2004.14840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiresolution-and-multimodal-speech-recognition-with-transformers-2004.14840"/></url>
<url><loc>https://scifaro.com/en/abs/robust-phonetic-segmentation-using-spectral-transition-measure-for-non-standard-recording-environments-2004.14859</loc><lastmod>2020-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-phonetic-segmentation-using-spectral-transition-measure-for-non-standard-recording-environments-2004.14859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-phonetic-segmentation-using-spectral-transition-measure-for-non-standard-recording-environments-2004.14859"/></url>
<url><loc>https://scifaro.com/en/abs/an-early-study-on-intelligent-analysis-of-speech-under-covid-19-severity-sleep-quality-fatigue-and-anxiety-2005.00096</loc><lastmod>2020-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-early-study-on-intelligent-analysis-of-speech-under-covid-19-severity-sleep-quality-fatigue-and-anxiety-2005.00096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-early-study-on-intelligent-analysis-of-speech-under-covid-19-severity-sleep-quality-fatigue-and-anxiety-2005.00096"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-acoustic-scene-classification-using-band-wise-statistics-matching-2005.00145</loc><lastmod>2020-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-acoustic-scene-classification-using-band-wise-statistics-matching-2005.00145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-acoustic-scene-classification-using-band-wise-statistics-matching-2005.00145"/></url>
<url><loc>https://scifaro.com/en/abs/jukebox-a-generative-model-for-music-2005.00341</loc><lastmod>2020-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jukebox-a-generative-model-for-music-2005.00341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jukebox-a-generative-model-for-music-2005.00341"/></url>
<url><loc>https://scifaro.com/en/abs/can-speaker-augmentation-improve-multi-speaker-end-to-end-tts-2005.01245</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-speaker-augmentation-improve-multi-speaker-end-to-end-tts-2005.01245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-speaker-augmentation-improve-multi-speaker-end-to-end-tts-2005.01245"/></url>
<url><loc>https://scifaro.com/en/abs/noise2weight-on-detecting-payload-weight-from-drones-acoustic-emissions-2005.01347</loc><lastmod>2020-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise2weight-on-detecting-payload-weight-from-drones-acoustic-emissions-2005.01347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise2weight-on-detecting-payload-weight-from-drones-acoustic-emissions-2005.01347"/></url>
<url><loc>https://scifaro.com/en/abs/does-visual-self-supervision-improve-learning-of-speech-representations-for-emotion-recognition-2005.01400</loc><lastmod>2021-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-visual-self-supervision-improve-learning-of-speech-representations-for-emotion-recognition-2005.01400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-visual-self-supervision-improve-learning-of-speech-representations-for-emotion-recognition-2005.01400"/></url>
<url><loc>https://scifaro.com/en/abs/approximal-operator-with-application-to-audio-inpainting-2005.01437</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approximal-operator-with-application-to-audio-inpainting-2005.01437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approximal-operator-with-application-to-audio-inpainting-2005.01437"/></url>
<url><loc>https://scifaro.com/en/abs/contextnet-improving-convolutional-neural-networks-for-automatic-speech-recognition-with-global-context-2005.03191</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextnet-improving-convolutional-neural-networks-for-automatic-speech-recognition-with-global-context-2005.03191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextnet-improving-convolutional-neural-networks-for-automatic-speech-recognition-with-global-context-2005.03191"/></url>
<url><loc>https://scifaro.com/en/abs/autospeech-neural-architecture-search-for-speaker-recognition-2005.03215</loc><lastmod>2020-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autospeech-neural-architecture-search-for-speaker-recognition-2005.03215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autospeech-neural-architecture-search-for-speaker-recognition-2005.03215"/></url>
<url><loc>https://scifaro.com/en/abs/rnn-t-models-fail-to-generalize-to-out-of-domain-audio-causes-and-solutions-2005.03271</loc><lastmod>2020-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rnn-t-models-fail-to-generalize-to-out-of-domain-audio-causes-and-solutions-2005.03271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rnn-t-models-fail-to-generalize-to-out-of-domain-audio-causes-and-solutions-2005.03271"/></url>
<url><loc>https://scifaro.com/en/abs/cotatron-transcription-guided-speech-encoder-for-any-to-many-voice-conversion-without-parallel-data-2005.03295</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cotatron-transcription-guided-speech-encoder-for-any-to-many-voice-conversion-without-parallel-data-2005.03295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cotatron-transcription-guided-speech-encoder-for-any-to-many-voice-conversion-without-parallel-data-2005.03295"/></url>
<url><loc>https://scifaro.com/en/abs/segment-aggregation-for-short-utterances-speaker-verification-using-raw-waveforms-2005.03329</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segment-aggregation-for-short-utterances-speaker-verification-using-raw-waveforms-2005.03329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segment-aggregation-for-short-utterances-speaker-verification-using-raw-waveforms-2005.03329"/></url>
<url><loc>https://scifaro.com/en/abs/scyclone-high-quality-and-parallel-data-free-voice-conversion-using-spectrogram-and-cycle-consistent-adversarial-networks-2005.03334</loc><lastmod>2020-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scyclone-high-quality-and-parallel-data-free-voice-conversion-using-spectrogram-and-cycle-consistent-adversarial-networks-2005.03334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scyclone-high-quality-and-parallel-data-free-voice-conversion-using-spectrogram-and-cycle-consistent-adversarial-networks-2005.03334"/></url>
<url><loc>https://scifaro.com/en/abs/domain-aware-training-for-far-field-small-footprint-keyword-spotting-2005.03633</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-aware-training-for-far-field-small-footprint-keyword-spotting-2005.03633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-aware-training-for-far-field-small-footprint-keyword-spotting-2005.03633"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-network-for-noise-robust-keyword-spotting-and-speaker-verification-using-ctc-based-soft-vad-and-global-query-attention-2005.03867</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-network-for-noise-robust-keyword-spotting-and-speaker-verification-using-ctc-based-soft-vad-and-global-query-attention-2005.03867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-network-for-noise-robust-keyword-spotting-and-speaker-verification-using-ctc-based-soft-vad-and-global-query-attention-2005.03867"/></url>
<url><loc>https://scifaro.com/en/abs/neural-spatio-temporal-beamformer-for-target-speech-separation-2005.03889</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-spatio-temporal-beamformer-for-target-speech-separation-2005.03889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-spatio-temporal-beamformer-for-target-speech-separation-2005.03889"/></url>
<url><loc>https://scifaro.com/en/abs/asteroid-the-pytorch-based-audio-source-separation-toolkit-for-researchers-2005.04132</loc><lastmod>2020-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asteroid-the-pytorch-based-audio-source-separation-toolkit-for-researchers-2005.04132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asteroid-the-pytorch-based-audio-source-separation-toolkit-for-researchers-2005.04132"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-learning-for-end-to-end-automatic-speech-recognition-2005.04288</loc><lastmod>2021-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-learning-for-end-to-end-automatic-speech-recognition-2005.04288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-learning-for-end-to-end-automatic-speech-recognition-2005.04288"/></url>
<url><loc>https://scifaro.com/en/abs/cross-language-transfer-learning-continuous-learning-and-domain-adaptation-for-end-to-end-automatic-speech-recognition-2005.04290</loc><lastmod>2020-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-language-transfer-learning-continuous-learning-and-domain-adaptation-for-end-to-end-automatic-speech-recognition-2005.04290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-language-transfer-learning-continuous-learning-and-domain-adaptation-for-end-to-end-automatic-speech-recognition-2005.04290"/></url>
<url><loc>https://scifaro.com/en/abs/u-net-based-direct-path-dominance-test-for-robust-direction-of-arrival-estimation-2005.04376</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-net-based-direct-path-dominance-test-for-robust-direction-of-arrival-estimation-2005.04376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-net-based-direct-path-dominance-test-for-robust-direction-of-arrival-estimation-2005.04376"/></url>
<url><loc>https://scifaro.com/en/abs/from-speaker-verification-to-multispeaker-speech-synthesis-deep-transfer-with-feedback-constraint-2005.04587</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-speaker-verification-to-multispeaker-speech-synthesis-deep-transfer-with-feedback-constraint-2005.04587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-speaker-verification-to-multispeaker-speech-synthesis-deep-transfer-with-feedback-constraint-2005.04587"/></url>
<url><loc>https://scifaro.com/en/abs/spex-a-complete-time-domain-speaker-extraction-network-2005.04686</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spex-a-complete-time-domain-speaker-extraction-network-2005.04686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spex-a-complete-time-domain-speaker-extraction-network-2005.04686"/></url>
<url><loc>https://scifaro.com/en/abs/listen-attentively-and-spell-once-whole-sentence-generation-via-a-non-autoregressive-architecture-for-low-latency-speech-recognition-2005.04862</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-attentively-and-spell-once-whole-sentence-generation-via-a-non-autoregressive-architecture-for-low-latency-speech-recognition-2005.04862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-attentively-and-spell-once-whole-sentence-generation-via-a-non-autoregressive-architecture-for-low-latency-speech-recognition-2005.04862"/></url>
<url><loc>https://scifaro.com/en/abs/tts-portuguese-corpus-a-corpus-for-speech-synthesis-in-brazilian-portuguese-2005.05144</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-portuguese-corpus-a-corpus-for-speech-synthesis-in-brazilian-portuguese-2005.05144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-portuguese-corpus-a-corpus-for-speech-synthesis-in-brazilian-portuguese-2005.05144"/></url>
<url><loc>https://scifaro.com/en/abs/audio-and-contact-microphones-for-cough-detection-2005.05313</loc><lastmod>2020-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-and-contact-microphones-for-cough-detection-2005.05313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-and-contact-microphones-for-cough-detection-2005.05313"/></url>
<url><loc>https://scifaro.com/en/abs/talknet-fully-convolutional-non-autoregressive-speech-synthesis-model-2005.05514</loc><lastmod>2020-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/talknet-fully-convolutional-non-autoregressive-speech-synthesis-model-2005.05514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/talknet-fully-convolutional-non-autoregressive-speech-synthesis-model-2005.05514"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-estimation-of-intelligibility-measure-for-consonants-in-speech-2005.06065</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-estimation-of-intelligibility-measure-for-consonants-in-speech-2005.06065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-estimation-of-intelligibility-measure-for-consonants-in-speech-2005.06065"/></url>
<url><loc>https://scifaro.com/en/abs/memory-controlled-sequential-self-attention-for-sound-recognition-2005.06650</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-controlled-sequential-self-attention-for-sound-recognition-2005.06650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-controlled-sequential-self-attention-for-sound-recognition-2005.06650"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-keyword-spotting-on-mobile-devices-2005.06720</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-keyword-spotting-on-mobile-devices-2005.06720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-keyword-spotting-on-mobile-devices-2005.06720"/></url>
<url><loc>https://scifaro.com/en/abs/consonant-gemination-in-italian-the-affricate-and-fricative-case-2005.06959</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consonant-gemination-in-italian-the-affricate-and-fricative-case-2005.06959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consonant-gemination-in-italian-the-affricate-and-fricative-case-2005.06959"/></url>
<url><loc>https://scifaro.com/en/abs/consonant-gemination-in-italian-the-nasal-and-liquid-case-2005.06960</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consonant-gemination-in-italian-the-nasal-and-liquid-case-2005.06960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consonant-gemination-in-italian-the-nasal-and-liquid-case-2005.06960"/></url>
<url><loc>https://scifaro.com/en/abs/foreground-background-ambient-sound-scene-separation-2005.07006</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foreground-background-ambient-sound-scene-separation-2005.07006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foreground-background-ambient-sound-scene-separation-2005.07006"/></url>
<url><loc>https://scifaro.com/en/abs/darts-asr-differentiable-architecture-search-for-multilingual-speech-recognition-and-adaptation-2005.07029</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/darts-asr-differentiable-architecture-search-for-multilingual-speech-recognition-and-adaptation-2005.07029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/darts-asr-differentiable-architecture-search-for-multilingual-speech-recognition-and-adaptation-2005.07029"/></url>
<url><loc>https://scifaro.com/en/abs/infant-crying-detection-in-real-world-environments-2005.07036</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infant-crying-detection-in-real-world-environments-2005.07036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infant-crying-detection-in-real-world-environments-2005.07036"/></url>
<url><loc>https://scifaro.com/en/abs/vibration-analysis-in-bearings-for-failure-prevention-using-cnn-2005.07057</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vibration-analysis-in-bearings-for-failure-prevention-using-cnn-2005.07057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vibration-analysis-in-bearings-for-failure-prevention-using-cnn-2005.07057"/></url>
<url><loc>https://scifaro.com/en/abs/ecapa-tdnn-emphasized-channel-attention-propagation-and-aggregation-in-tdnn-based-speaker-verification-2005.07143</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecapa-tdnn-emphasized-channel-attention-propagation-and-aggregation-in-tdnn-based-speaker-verification-2005.07143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecapa-tdnn-emphasized-channel-attention-propagation-and-aggregation-in-tdnn-based-speaker-verification-2005.07143"/></url>
<url><loc>https://scifaro.com/en/abs/you-do-not-need-more-data-improving-end-to-end-speech-recognition-by-text-to-speech-data-augmentation-2005.07157</loc><lastmod>2020-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/you-do-not-need-more-data-improving-end-to-end-speech-recognition-by-text-to-speech-data-augmentation-2005.07157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/you-do-not-need-more-data-improving-end-to-end-speech-recognition-by-text-to-speech-data-augmentation-2005.07157"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-voice-activity-detection-a-novel-approach-for-multi-speaker-diarization-in-a-dinner-party-scenario-2005.07272</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-a-novel-approach-for-multi-speaker-diarization-in-a-dinner-party-scenario-2005.07272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-a-novel-approach-for-multi-speaker-diarization-in-a-dinner-party-scenario-2005.07272"/></url>
<url><loc>https://scifaro.com/en/abs/on-bottleneck-features-for-text-dependent-speaker-verification-using-x-vectors-2005.07383</loc><lastmod>2020-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-bottleneck-features-for-text-dependent-speaker-verification-using-x-vectors-2005.07383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-bottleneck-features-for-text-dependent-speaker-verification-using-x-vectors-2005.07383"/></url>
<url><loc>https://scifaro.com/en/abs/wg-wavenet-real-time-high-fidelity-speech-synthesis-without-gpu-2005.07412</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wg-wavenet-real-time-high-fidelity-speech-synthesis-without-gpu-2005.07412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wg-wavenet-real-time-high-fidelity-speech-synthesis-without-gpu-2005.07412"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-neural-networks-for-class-activity-detection-2005.07549</loc><lastmod>2020-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-neural-networks-for-class-activity-detection-2005.07549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-neural-networks-for-class-activity-detection-2005.07549"/></url>
<url><loc>https://scifaro.com/en/abs/dual-signal-transformation-lstm-network-for-real-time-noise-suppression-2005.07551</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-signal-transformation-lstm-network-for-real-time-noise-suppression-2005.07551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-signal-transformation-lstm-network-for-real-time-noise-suppression-2005.07551"/></url>
<url><loc>https://scifaro.com/en/abs/context-dependent-acoustic-modeling-without-explicit-phone-clustering-2005.07578</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-dependent-acoustic-modeling-without-explicit-phone-clustering-2005.07578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-dependent-acoustic-modeling-without-explicit-phone-clustering-2005.07578"/></url>
<url><loc>https://scifaro.com/en/abs/an-auto-encoder-for-audio-dolphin-communication-2005.07623</loc><lastmod>2020-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-auto-encoder-for-audio-dolphin-communication-2005.07623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-auto-encoder-for-audio-dolphin-communication-2005.07623"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-residual-echo-suppression-based-on-multi-stream-conv-tasnet-2005.07631</loc><lastmod>2020-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-residual-echo-suppression-based-on-multi-stream-conv-tasnet-2005.07631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-residual-echo-suppression-based-on-multi-stream-conv-tasnet-2005.07631"/></url>
<url><loc>https://scifaro.com/en/abs/i-have-vxxx-bxx-connexxxn-facing-packet-loss-in-deep-speech-emotion-recognition-2005.07757</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-have-vxxx-bxx-connexxxn-facing-packet-loss-in-deep-speech-emotion-recognition-2005.07757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-have-vxxx-bxx-connexxxn-facing-packet-loss-in-deep-speech-emotion-recognition-2005.07757"/></url>
<url><loc>https://scifaro.com/en/abs/concealnet-an-end-to-end-neural-network-for-packet-loss-concealment-in-deep-speech-emotion-recognition-2005.07777</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concealnet-an-end-to-end-neural-network-for-packet-loss-concealment-in-deep-speech-emotion-recognition-2005.07777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concealnet-an-end-to-end-neural-network-for-packet-loss-concealment-in-deep-speech-emotion-recognition-2005.07777"/></url>
<url><loc>https://scifaro.com/en/abs/reliable-local-explanations-for-machine-listening-2005.07788</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reliable-local-explanations-for-machine-listening-2005.07788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reliable-local-explanations-for-machine-listening-2005.07788"/></url>
<url><loc>https://scifaro.com/en/abs/on-deep-speech-packet-loss-concealment-a-mini-survey-2005.07794</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-deep-speech-packet-loss-concealment-a-mini-survey-2005.07794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-deep-speech-packet-loss-concealment-a-mini-survey-2005.07794"/></url>
<url><loc>https://scifaro.com/en/abs/jdi-t-jointly-trained-duration-informed-transformer-for-text-to-speech-without-explicit-alignment-2005.07799</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jdi-t-jointly-trained-duration-informed-transformer-for-text-to-speech-without-explicit-alignment-2005.07799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jdi-t-jointly-trained-duration-informed-transformer-for-text-to-speech-without-explicit-alignment-2005.07799"/></url>
<url><loc>https://scifaro.com/en/abs/feature-fusion-strategies-for-end-to-end-evaluation-of-cognitive-behavior-therapy-sessions-2005.07809</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-fusion-strategies-for-end-to-end-evaluation-of-cognitive-behavior-therapy-sessions-2005.07809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-fusion-strategies-for-end-to-end-evaluation-of-cognitive-behavior-therapy-sessions-2005.07809"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-cross-domain-speech-to-speech-conversion-with-time-frequency-consistency-2005.07810</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-cross-domain-speech-to-speech-conversion-with-time-frequency-consistency-2005.07810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-cross-domain-speech-to-speech-conversion-with-time-frequency-consistency-2005.07810"/></url>
<url><loc>https://scifaro.com/en/abs/convoice-real-time-zero-shot-voice-style-transfer-with-convolutional-network-2005.07815</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convoice-real-time-zero-shot-voice-style-transfer-with-convolutional-network-2005.07815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convoice-real-time-zero-shot-voice-style-transfer-with-convolutional-network-2005.07815"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-training-of-hierarchical-attention-networks-for-speaker-identification-2005.07817</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-training-of-hierarchical-attention-networks-for-speaker-identification-2005.07817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-training-of-hierarchical-attention-networks-for-speaker-identification-2005.07817"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-re-identification-with-speaker-dependent-speech-enhancement-2005.07818</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-re-identification-with-speaker-dependent-speech-enhancement-2005.07818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-re-identification-with-speaker-dependent-speech-enhancement-2005.07818"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-weakly-and-semi-supervised-learning-for-low-resource-video-asr-2005.07850</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-weakly-and-semi-supervised-learning-for-low-resource-video-asr-2005.07850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-weakly-and-semi-supervised-learning-for-low-resource-video-asr-2005.07850"/></url>
<url><loc>https://scifaro.com/en/abs/improved-prosody-from-learned-f0-codebook-representations-for-vq-vae-speech-waveform-reconstruction-2005.07884</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-prosody-from-learned-f0-codebook-representations-for-vq-vae-speech-waveform-reconstruction-2005.07884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-prosody-from-learned-f0-codebook-representations-for-vq-vae-speech-waveform-reconstruction-2005.07884"/></url>
<url><loc>https://scifaro.com/en/abs/spike-triggered-non-autoregressive-transformer-for-end-to-end-speech-recognition-2005.07903</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spike-triggered-non-autoregressive-transformer-for-end-to-end-speech-recognition-2005.07903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spike-triggered-non-autoregressive-transformer-for-end-to-end-speech-recognition-2005.07903"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-spelling-inconsistencies-in-code-switching-asr-using-contextualized-ctc-loss-2005.07920</loc><lastmod>2021-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-spelling-inconsistencies-in-code-switching-asr-using-contextualized-ctc-loss-2005.07920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-spelling-inconsistencies-in-code-switching-asr-using-contextualized-ctc-loss-2005.07920"/></url>
<url><loc>https://scifaro.com/en/abs/accentdb-a-database-of-non-native-english-accents-to-assist-neural-speech-recognition-2005.07973</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accentdb-a-database-of-non-native-english-accents-to-assist-neural-speech-recognition-2005.07973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accentdb-a-database-of-non-native-english-accents-to-assist-neural-speech-recognition-2005.07973"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-extraction-based-on-blind-source-separation-and-x-vector-based-speaker-selection-trained-with-data-augmentation-2005.07976</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-extraction-based-on-blind-source-separation-and-x-vector-based-speaker-selection-trained-with-data-augmentation-2005.07976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-extraction-based-on-blind-source-separation-and-x-vector-based-speaker-selection-trained-with-data-augmentation-2005.07976"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-for-multi-speaker-text-to-speech-synthesis-using-discrete-speech-representation-2005.08024</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-for-multi-speaker-text-to-speech-synthesis-using-discrete-speech-representation-2005.08024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-for-multi-speaker-text-to-speech-synthesis-using-discrete-speech-representation-2005.08024"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-transformer-based-acoustic-models-using-self-attention-with-augmented-memory-2005.08042</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-transformer-based-acoustic-models-using-self-attention-with-augmented-memory-2005.08042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-transformer-based-acoustic-models-using-self-attention-with-augmented-memory-2005.08042"/></url>
<url><loc>https://scifaro.com/en/abs/the-interspeech-2020-far-field-speaker-verification-challenge-2005.08046</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interspeech-2020-far-field-speaker-verification-challenge-2005.08046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interspeech-2020-far-field-speaker-verification-challenge-2005.08046"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-audio-quality-assessment-and-anomaly-localisation-using-attention-models-2005.08053</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-audio-quality-assessment-and-anomaly-localisation-using-attention-models-2005.08053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-audio-quality-assessment-and-anomaly-localisation-using-attention-models-2005.08053"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-and-multi-speaker-diarization-of-long-conversations-2005.08072</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-and-multi-speaker-diarization-of-long-conversations-2005.08072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-and-multi-speaker-diarization-of-long-conversations-2005.08072"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-convolution-augmented-transformer-for-speech-recognition-2005.08100</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-convolution-augmented-transformer-for-speech-recognition-2005.08100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-convolution-augmented-transformer-for-speech-recognition-2005.08100"/></url>
<url><loc>https://scifaro.com/en/abs/that-sounds-familiar-an-analysis-of-phonetic-representations-transfer-across-languages-2005.08118</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/that-sounds-familiar-an-analysis-of-phonetic-representations-transfer-across-languages-2005.08118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/that-sounds-familiar-an-analysis-of-phonetic-representations-transfer-across-languages-2005.08118"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-mixture-of-local-experts-for-efficient-speech-enhancement-2005.08128</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-mixture-of-local-experts-for-efficient-speech-enhancement-2005.08128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-mixture-of-local-experts-for-efficient-speech-enhancement-2005.08128"/></url>
<url><loc>https://scifaro.com/en/abs/an-open-source-implementation-of-itu-t-recommendation-p-808-with-validation-2005.08138</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-open-source-implementation-of-itu-t-recommendation-p-808-with-validation-2005.08138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-open-source-implementation-of-itu-t-recommendation-p-808-with-validation-2005.08138"/></url>
<url><loc>https://scifaro.com/en/abs/identification-segmentation-of-indian-regional-languages-with-singular-value-decomposition-based-feature-embedding-2005.08229</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-segmentation-of-indian-regional-languages-with-singular-value-decomposition-based-feature-embedding-2005.08229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-segmentation-of-indian-regional-languages-with-singular-value-decomposition-based-feature-embedding-2005.08229"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-far-field-feature-enhancement-for-speaker-verification-in-the-wild-2005.08331</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-far-field-feature-enhancement-for-speaker-verification-in-the-wild-2005.08331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-far-field-feature-enhancement-for-speaker-verification-in-the-wild-2005.08331"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-target-speech-separation-with-voice-and-face-references-2005.08335</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-target-speech-separation-with-voice-and-face-references-2005.08335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-target-speech-separation-with-voice-and-face-references-2005.08335"/></url>
<url><loc>https://scifaro.com/en/abs/wake-word-detection-with-alignment-free-lattice-free-mmi-2005.08347</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wake-word-detection-with-alignment-free-lattice-free-mmi-2005.08347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wake-word-detection-with-alignment-free-lattice-free-mmi-2005.08347"/></url>
<url><loc>https://scifaro.com/en/abs/north-atlantic-right-whales-up-call-detection-using-multimodel-deep-learning-2005.08356</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/north-atlantic-right-whales-up-call-detection-using-multimodel-deep-learning-2005.08356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/north-atlantic-right-whales-up-call-detection-using-multimodel-deep-learning-2005.08356"/></url>
<url><loc>https://scifaro.com/en/abs/vector-quantized-autoregressive-predictive-coding-2005.08392</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-quantized-autoregressive-predictive-coding-2005.08392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-quantized-autoregressive-predictive-coding-2005.08392"/></url>
<url><loc>https://scifaro.com/en/abs/the-ntnu-system-at-the-interspeech-2020-non-native-children-s-speech-asr-challenge-2005.08433</loc><lastmod>2020-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ntnu-system-at-the-interspeech-2020-non-native-children-s-speech-asr-challenge-2005.08433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ntnu-system-at-the-interspeech-2020-non-native-children-s-speech-asr-challenge-2005.08433"/></url>
<url><loc>https://scifaro.com/en/abs/an-effective-end-to-end-modeling-approach-for-mispronunciation-detection-2005.08440</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-effective-end-to-end-modeling-approach-for-mispronunciation-detection-2005.08440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-effective-end-to-end-modeling-approach-for-mispronunciation-detection-2005.08440"/></url>
<url><loc>https://scifaro.com/en/abs/many-to-many-voice-transformer-network-2005.08445</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-to-many-voice-transformer-network-2005.08445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-to-many-voice-transformer-network-2005.08445"/></url>
<url><loc>https://scifaro.com/en/abs/attentron-few-shot-text-to-speech-utilizing-attention-based-variable-length-embedding-2005.08484</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentron-few-shot-text-to-speech-utilizing-attention-based-variable-length-embedding-2005.08484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentron-few-shot-text-to-speech-utilizing-attention-based-variable-length-embedding-2005.08484"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-transducer-for-online-speech-recognition-2005.08497</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-transducer-for-online-speech-recognition-2005.08497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-transducer-for-online-speech-recognition-2005.08497"/></url>
<url><loc>https://scifaro.com/en/abs/unconditional-audio-generation-with-generative-adversarial-networks-and-cycle-regularization-2005.08526</loc><lastmod>2021-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unconditional-audio-generation-with-generative-adversarial-networks-and-cycle-regularization-2005.08526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unconditional-audio-generation-with-generative-adversarial-networks-and-cycle-regularization-2005.08526"/></url>
<url><loc>https://scifaro.com/en/abs/moboaligner-a-neural-alignment-model-for-non-autoregressive-tts-with-monotonic-boundary-search-2005.08528</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moboaligner-a-neural-alignment-model-for-non-autoregressive-tts-with-monotonic-boundary-search-2005.08528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moboaligner-a-neural-alignment-model-for-non-autoregressive-tts-with-monotonic-boundary-search-2005.08528"/></url>
<url><loc>https://scifaro.com/en/abs/quaternion-neural-networks-for-multi-channel-distant-speech-recognition-2005.08566</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quaternion-neural-networks-for-multi-channel-distant-speech-recognition-2005.08566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quaternion-neural-networks-for-multi-channel-distant-speech-recognition-2005.08566"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-multi-channel-recognition-of-overlapped-speech-2005.08571</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-multi-channel-recognition-of-overlapped-speech-2005.08571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-multi-channel-recognition-of-overlapped-speech-2005.08571"/></url>
<url><loc>https://scifaro.com/en/abs/audio-albert-a-lite-bert-for-self-supervised-learning-of-audio-representation-2005.08575</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-albert-a-lite-bert-for-self-supervised-learning-of-audio-representation-2005.08575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-albert-a-lite-bert-for-self-supervised-learning-of-audio-representation-2005.08575"/></url>
<url><loc>https://scifaro.com/en/abs/design-choices-for-x-vector-based-speaker-anonymization-2005.08601</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-choices-for-x-vector-based-speaker-anonymization-2005.08601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-choices-for-x-vector-based-speaker-anonymization-2005.08601"/></url>
<url><loc>https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-vocoder-a-non-autoregressive-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-2005.08654</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-vocoder-a-non-autoregressive-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-2005.08654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-vocoder-a-non-autoregressive-pitch-dependent-dilated-convolution-model-for-parametric-speech-generation-2005.08654"/></url>
<url><loc>https://scifaro.com/en/abs/a-cyclical-post-filtering-approach-to-mismatch-refinement-of-neural-vocoder-for-text-to-speech-systems-2005.08659</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cyclical-post-filtering-approach-to-mismatch-refinement-of-neural-vocoder-for-text-to-speech-systems-2005.08659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cyclical-post-filtering-approach-to-mismatch-refinement-of-neural-vocoder-for-text-to-speech-systems-2005.08659"/></url>
<url><loc>https://scifaro.com/en/abs/mask-ctc-non-autoregressive-end-to-end-asr-with-ctc-and-mask-predict-2005.08700</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-ctc-non-autoregressive-end-to-end-asr-with-ctc-and-mask-predict-2005.08700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-ctc-non-autoregressive-end-to-end-asr-with-ctc-and-mask-predict-2005.08700"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-fusion-of-attention-and-sequence-to-sequence-autoencoders-to-predict-sleepiness-from-speech-2005.08722</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-fusion-of-attention-and-sequence-to-sequence-autoencoders-to-predict-sleepiness-from-speech-2005.08722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-fusion-of-attention-and-sequence-to-sequence-autoencoders-to-predict-sleepiness-from-speech-2005.08722"/></url>
<url><loc>https://scifaro.com/en/abs/approaches-to-improving-recognition-of-underrepresented-named-entities-in-hybrid-asr-systems-2005.08742</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approaches-to-improving-recognition-of-underrepresented-named-entities-in-hybrid-asr-systems-2005.08742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approaches-to-improving-recognition-of-underrepresented-named-entities-in-hybrid-asr-systems-2005.08742"/></url>
<url><loc>https://scifaro.com/en/abs/metric-learning-for-keyword-spotting-2005.08776</loc><lastmod>2020-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-learning-for-keyword-spotting-2005.08776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-learning-for-keyword-spotting-2005.08776"/></url>
<url><loc>https://scifaro.com/en/abs/defending-your-voice-adversarial-attack-on-voice-conversion-2005.08781</loc><lastmod>2021-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defending-your-voice-adversarial-attack-on-voice-conversion-2005.08781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defending-your-voice-adversarial-attack-on-voice-conversion-2005.08781"/></url>
<url><loc>https://scifaro.com/en/abs/weak-attention-suppression-for-transformer-based-speech-recognition-2005.09137</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weak-attention-suppression-for-transformer-based-speech-recognition-2005.09137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weak-attention-suppression-for-transformer-based-speech-recognition-2005.09137"/></url>
<url><loc>https://scifaro.com/en/abs/faster-simpler-and-more-accurate-hybrid-asr-systems-using-wordpieces-2005.09150</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/faster-simpler-and-more-accurate-hybrid-asr-systems-using-wordpieces-2005.09150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/faster-simpler-and-more-accurate-hybrid-asr-systems-using-wordpieces-2005.09150"/></url>
<url><loc>https://scifaro.com/en/abs/transferring-source-style-in-non-parallel-voice-conversion-2005.09178</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferring-source-style-in-non-parallel-voice-conversion-2005.09178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferring-source-style-in-non-parallel-voice-conversion-2005.09178"/></url>
<url><loc>https://scifaro.com/en/abs/atss-net-target-speaker-separation-via-attention-based-neural-network-2005.09200</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atss-net-target-speaker-separation-via-attention-based-neural-network-2005.09200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atss-net-target-speaker-separation-via-attention-based-neural-network-2005.09200"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-based-on-interpolation-deep-neural-network-2005.09234</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-interpolation-deep-neural-network-2005.09234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-interpolation-deep-neural-network-2005.09234"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-training-data-adaptation-for-very-low-resource-automatic-speech-recognition-2005.09256</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-training-data-adaptation-for-very-low-resource-automatic-speech-recognition-2005.09256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-training-data-adaptation-for-very-low-resource-automatic-speech-recognition-2005.09256"/></url>
<url><loc>https://scifaro.com/en/abs/robust-beam-search-for-encoder-decoder-attention-based-speech-recognition-without-length-bias-2005.09265</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-beam-search-for-encoder-decoder-attention-based-speech-recognition-without-length-bias-2005.09265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-beam-search-for-encoder-decoder-attention-based-speech-recognition-without-length-bias-2005.09265"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-subspace-hmm-for-the-zerospeech-2020-challenge-2005.09282</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-subspace-hmm-for-the-zerospeech-2020-challenge-2005.09282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-subspace-hmm-for-the-zerospeech-2020-challenge-2005.09282"/></url>
<url><loc>https://scifaro.com/en/abs/should-we-hard-code-the-recurrence-concept-or-learn-it-instead-exploring-the-transformer-architecture-for-audio-visual-speech-recognition-2005.09297</loc><lastmod>2020-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/should-we-hard-code-the-recurrence-concept-or-learn-it-instead-exploring-the-transformer-architecture-for-audio-visual-speech-recognition-2005.09297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/should-we-hard-code-the-recurrence-concept-or-learn-it-instead-exploring-the-transformer-architecture-for-audio-visual-speech-recognition-2005.09297"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-training-pipeline-for-an-improved-neural-transducer-2005.09319</loc><lastmod>2020-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-training-pipeline-for-an-improved-neural-transducer-2005.09319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-training-pipeline-for-an-improved-neural-transducer-2005.09319"/></url>
<url><loc>https://scifaro.com/en/abs/a-systematic-comparison-of-grapheme-based-vs-phoneme-based-label-units-for-encoder-decoder-attention-models-2005.09336</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-systematic-comparison-of-grapheme-based-vs-phoneme-based-label-units-for-encoder-decoder-attention-models-2005.09336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-systematic-comparison-of-grapheme-based-vs-phoneme-based-label-units-for-encoder-decoder-attention-models-2005.09336"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-monotonic-multihead-attention-for-streaming-asr-2005.09394</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-monotonic-multihead-attention-for-streaming-asr-2005.09394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-monotonic-multihead-attention-for-streaming-asr-2005.09394"/></url>
<url><loc>https://scifaro.com/en/abs/vector-quantized-neural-networks-for-acoustic-unit-discovery-in-the-zerospeech-2020-challenge-2005.09409</loc><lastmod>2020-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-quantized-neural-networks-for-acoustic-unit-discovery-in-the-zerospeech-2020-challenge-2005.09409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-quantized-neural-networks-for-acoustic-unit-discovery-in-the-zerospeech-2020-challenge-2005.09409"/></url>
<url><loc>https://scifaro.com/en/abs/learning-joint-articulatory-acoustic-representations-with-normalizing-flows-2005.09463</loc><lastmod>2020-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-joint-articulatory-acoustic-representations-with-normalizing-flows-2005.09463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-joint-articulatory-acoustic-representations-with-normalizing-flows-2005.09463"/></url>
<url><loc>https://scifaro.com/en/abs/gev-beamforming-supported-by-doa-based-masks-generated-on-pairs-of-microphones-2005.09587</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gev-beamforming-supported-by-doa-based-masks-generated-on-pairs-of-microphones-2005.09587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gev-beamforming-supported-by-doa-based-masks-generated-on-pairs-of-microphones-2005.09587"/></url>
<url><loc>https://scifaro.com/en/abs/improved-noisy-student-training-for-automatic-speech-recognition-2005.09629</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-noisy-student-training-for-automatic-speech-recognition-2005.09629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-noisy-student-training-for-automatic-speech-recognition-2005.09629"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-transformers-for-large-scale-speech-recognition-2005.09684</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-transformers-for-large-scale-speech-recognition-2005.09684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-transformers-for-large-scale-speech-recognition-2005.09684"/></url>
<url><loc>https://scifaro.com/en/abs/improving-proper-noun-recognition-in-end-to-end-asr-by-customization-of-the-mwer-loss-criterion-2005.09756</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-proper-noun-recognition-in-end-to-end-asr-by-customization-of-the-mwer-loss-criterion-2005.09756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-proper-noun-recognition-in-end-to-end-asr-by-customization-of-the-mwer-loss-criterion-2005.09756"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-similarity-between-piano-notes-simulations-with-a-template-based-perception-model-2005.09768</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-similarity-between-piano-notes-simulations-with-a-template-based-perception-model-2005.09768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-similarity-between-piano-notes-simulations-with-a-template-based-perception-model-2005.09768"/></url>
<url><loc>https://scifaro.com/en/abs/pychain-a-fully-parallelized-pytorch-implementation-of-lf-mmi-for-end-to-end-asr-2005.09824</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pychain-a-fully-parallelized-pytorch-implementation-of-lf-mmi-for-end-to-end-asr-2005.09824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pychain-a-fully-parallelized-pytorch-implementation-of-lf-mmi-for-end-to-end-asr-2005.09824"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-optimal-denoising-dereverberation-and-source-separation-2005.09843</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-optimal-denoising-dereverberation-and-source-separation-2005.09843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-optimal-denoising-dereverberation-and-source-separation-2005.09843"/></url>
<url><loc>https://scifaro.com/en/abs/a-further-study-of-unsupervised-pre-training-for-transformer-based-speech-recognition-2005.09862</loc><lastmod>2020-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-further-study-of-unsupervised-pre-training-for-transformer-based-speech-recognition-2005.09862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-further-study-of-unsupervised-pre-training-for-transformer-based-speech-recognition-2005.09862"/></url>
<url><loc>https://scifaro.com/en/abs/consistent-ica-determined-bss-meets-spectrogram-consistency-2005.09873</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistent-ica-determined-bss-meets-spectrogram-consistency-2005.09873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistent-ica-determined-bss-meets-spectrogram-consistency-2005.09873"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-and-neural-network-based-speech-activity-detection-in-non-stationary-acoustic-environments-2005.09913</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-and-neural-network-based-speech-activity-detection-in-non-stationary-acoustic-environments-2005.09913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-and-neural-network-based-speech-activity-detection-in-non-stationary-acoustic-environments-2005.09913"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-diarization-for-an-unknown-number-of-speakers-with-encoder-decoder-based-attractors-2005.09921</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-for-an-unknown-number-of-speakers-with-encoder-decoder-based-attractors-2005.09921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-for-an-unknown-number-of-speakers-with-encoder-decoder-based-attractors-2005.09921"/></url>
<url><loc>https://scifaro.com/en/abs/relative-positional-encoding-for-speech-recognition-and-direct-translation-2005.09940</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relative-positional-encoding-for-speech-recognition-and-direct-translation-2005.09940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relative-positional-encoding-for-speech-recognition-and-direct-translation-2005.09940"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-features-and-metrics-for-high-quality-simulation-of-early-vocal-learning-of-vowels-2005.09986</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-features-and-metrics-for-high-quality-simulation-of-early-vocal-learning-of-vowels-2005.09986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-features-and-metrics-for-high-quality-simulation-of-early-vocal-learning-of-vowels-2005.09986"/></url>
<url><loc>https://scifaro.com/en/abs/early-stage-lm-integration-using-local-and-global-log-linear-combination-2005.10049</loc><lastmod>2020-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/early-stage-lm-integration-using-local-and-global-log-linear-combination-2005.10049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/early-stage-lm-integration-using-local-and-global-log-linear-combination-2005.10049"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-large-margin-softmax-in-neural-language-modeling-2005.10089</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-large-margin-softmax-in-neural-language-modeling-2005.10089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-large-margin-softmax-in-neural-language-modeling-2005.10089"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-label-synchronous-and-frame-synchronous-end-to-end-models-for-speech-recognition-2005.10113</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-label-synchronous-and-frame-synchronous-end-to-end-models-for-speech-recognition-2005.10113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-label-synchronous-and-frame-synchronous-end-to-end-models-for-speech-recognition-2005.10113"/></url>
<url><loc>https://scifaro.com/en/abs/towards-cover-song-detection-with-siamese-convolutional-neural-networks-2005.10294</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-cover-song-detection-with-siamese-convolutional-neural-networks-2005.10294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-cover-song-detection-with-siamese-convolutional-neural-networks-2005.10294"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-look-keyword-spotting-2005.10386</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-look-keyword-spotting-2005.10386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-look-keyword-spotting-2005.10386"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-learning-abilities-on-linguistic-features-in-sequence-to-sequence-text-to-speech-synthesis-2005.10390</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-learning-abilities-on-linguistic-features-in-sequence-to-sequence-text-to-speech-synthesis-2005.10390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-learning-abilities-on-linguistic-features-in-sequence-to-sequence-text-to-speech-synthesis-2005.10390"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-attack-detection-using-the-non-linear-fusion-of-sub-band-classifiers-2005.10393</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-attack-detection-using-the-non-linear-fusion-of-sub-band-classifiers-2005.10393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-attack-detection-using-the-non-linear-fusion-of-sub-band-classifiers-2005.10393"/></url>
<url><loc>https://scifaro.com/en/abs/training-keyword-spotting-models-on-non-iid-data-with-federated-learning-2005.10406</loc><lastmod>2020-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-keyword-spotting-models-on-non-iid-data-with-federated-learning-2005.10406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-keyword-spotting-models-on-non-iid-data-with-federated-learning-2005.10406"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-text-data-using-hybrid-transformer-lstm-based-end-to-end-asr-in-transfer-learning-2005.10407</loc><lastmod>2020-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-text-data-using-hybrid-transformer-lstm-based-end-to-end-asr-in-transfer-learning-2005.10407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-text-data-using-hybrid-transformer-lstm-based-end-to-end-asr-in-transfer-learning-2005.10407"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-multispeaker-text-to-speech-under-limited-data-scenario-2005.10441</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-multispeaker-text-to-speech-under-limited-data-scenario-2005.10441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-multispeaker-text-to-speech-under-limited-data-scenario-2005.10441"/></url>
<url><loc>https://scifaro.com/en/abs/pitchtron-towards-audiobook-generation-from-ordinary-people-s-voices-2005.10456</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitchtron-towards-audiobook-generation-from-ordinary-people-s-voices-2005.10456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitchtron-towards-audiobook-generation-from-ordinary-people-s-voices-2005.10456"/></url>
<url><loc>https://scifaro.com/en/abs/asapp-asr-multistream-cnn-and-self-attentive-sru-for-sota-speech-recognition-2005.10469</loc><lastmod>2020-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asapp-asr-multistream-cnn-and-self-attentive-sru-for-sota-speech-recognition-2005.10469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asapp-asr-multistream-cnn-and-self-attentive-sru-for-sota-speech-recognition-2005.10469"/></url>
<url><loc>https://scifaro.com/en/abs/multistream-cnn-for-robust-acoustic-modeling-2005.10470</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multistream-cnn-for-robust-acoustic-modeling-2005.10470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multistream-cnn-for-robust-acoustic-modeling-2005.10470"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-far-field-speech-recognition-with-unified-dereverberation-and-beamforming-2005.10479</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-far-field-speech-recognition-with-unified-dereverberation-and-beamforming-2005.10479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-far-field-speech-recognition-with-unified-dereverberation-and-beamforming-2005.10479"/></url>
<url><loc>https://scifaro.com/en/abs/coswara-a-database-of-breathing-cough-and-voice-sounds-for-covid-19-diagnosis-2005.10548</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coswara-a-database-of-breathing-cough-and-voice-sounds-for-covid-19-diagnosis-2005.10548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coswara-a-database-of-breathing-cough-and-voice-sounds-for-covid-19-diagnosis-2005.10548"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-sparsity-neural-networks-for-automatic-speech-recognition-2005.10627</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-sparsity-neural-networks-for-automatic-speech-recognition-2005.10627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-sparsity-neural-networks-for-automatic-speech-recognition-2005.10627"/></url>
<url><loc>https://scifaro.com/en/abs/formant-tracking-using-dilated-convolutional-networks-through-dense-connection-with-gating-mechanism-2005.10803</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/formant-tracking-using-dilated-convolutional-networks-through-dense-connection-with-gating-mechanism-2005.10803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/formant-tracking-using-dilated-convolutional-networks-through-dense-connection-with-gating-mechanism-2005.10803"/></url>
<url><loc>https://scifaro.com/en/abs/nautilus-a-versatile-voice-cloning-system-2005.11004</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nautilus-a-versatile-voice-cloning-system-2005.11004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nautilus-a-versatile-voice-cloning-system-2005.11004"/></url>
<url><loc>https://scifaro.com/en/abs/glow-tts-a-generative-flow-for-text-to-speech-via-monotonic-alignment-search-2005.11129</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glow-tts-a-generative-flow-for-text-to-speech-via-monotonic-alignment-search-2005.11129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glow-tts-a-generative-flow-for-text-to-speech-via-monotonic-alignment-search-2005.11129"/></url>
<url><loc>https://scifaro.com/en/abs/tinylstms-efficient-neural-speech-enhancement-for-hearing-aids-2005.11138</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tinylstms-efficient-neural-speech-enhancement-for-hearing-aids-2005.11138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tinylstms-efficient-neural-speech-enhancement-for-hearing-aids-2005.11138"/></url>
<url><loc>https://scifaro.com/en/abs/deep-reinforcement-learning-with-pre-training-for-time-efficient-training-of-automatic-speech-recognition-2005.11172</loc><lastmod>2020-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-reinforcement-learning-with-pre-training-for-time-efficient-training-of-automatic-speech-recognition-2005.11172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-reinforcement-learning-with-pre-training-for-time-efficient-training-of-automatic-speech-recognition-2005.11172"/></url>
<url><loc>https://scifaro.com/en/abs/leap-submission-to-chime-6-asr-challenge-2005.11258</loc><lastmod>2020-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leap-submission-to-chime-6-asr-challenge-2005.11258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leap-submission-to-chime-6-asr-challenge-2005.11258"/></url>
<url><loc>https://scifaro.com/en/abs/librimix-an-open-source-dataset-for-generalizable-speech-separation-2005.11262</loc><lastmod>2020-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/librimix-an-open-source-dataset-for-generalizable-speech-separation-2005.11262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/librimix-an-open-source-dataset-for-generalizable-speech-separation-2005.11262"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-array-based-surveillance-audio-classification-2005.11348</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-array-based-surveillance-audio-classification-2005.11348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-array-based-surveillance-audio-classification-2005.11348"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-with-session-level-speaker-embedding-refinement-using-graph-neural-networks-2005.11371</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-with-session-level-speaker-embedding-refinement-using-graph-neural-networks-2005.11371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-with-session-level-speaker-embedding-refinement-using-graph-neural-networks-2005.11371"/></url>
<url><loc>https://scifaro.com/en/abs/identify-speakers-in-cocktail-parties-with-end-to-end-attention-2005.11408</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identify-speakers-in-cocktail-parties-with-end-to-end-attention-2005.11408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identify-speakers-in-cocktail-parties-with-end-to-end-attention-2005.11408"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-best-loss-function-for-dnn-based-low-latency-speech-enhancement-with-temporal-convolutional-networks-2005.11611</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-best-loss-function-for-dnn-based-low-latency-speech-enhancement-with-temporal-convolutional-networks-2005.11611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-best-loss-function-for-dnn-based-low-latency-speech-enhancement-with-temporal-convolutional-networks-2005.11611"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-integration-of-multi-channel-information-for-speaker-independent-speech-separation-2005.11612</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-integration-of-multi-channel-information-for-speaker-independent-speech-separation-2005.11612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-integration-of-multi-channel-information-for-speaker-independent-speech-separation-2005.11612"/></url>
<url><loc>https://scifaro.com/en/abs/glottal-source-estimation-robustness-a-comparison-of-sensitivity-of-voice-source-estimation-techniques-2005.11682</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glottal-source-estimation-robustness-a-comparison-of-sensitivity-of-voice-source-estimation-techniques-2005.11682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glottal-source-estimation-robustness-a-comparison-of-sensitivity-of-voice-source-estimation-techniques-2005.11682"/></url>
<url><loc>https://scifaro.com/en/abs/mimo-speech-compression-and-enhancement-based-on-convolutional-denoising-autoencoder-2005.11704</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimo-speech-compression-and-enhancement-based-on-convolutional-denoising-autoencoder-2005.11704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimo-speech-compression-and-enhancement-based-on-convolutional-denoising-autoencoder-2005.11704"/></url>
<url><loc>https://scifaro.com/en/abs/seril-noise-adaptive-speech-enhancement-using-regularization-based-incremental-learning-2005.11760</loc><lastmod>2020-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seril-noise-adaptive-speech-enhancement-using-regularization-based-incremental-learning-2005.11760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seril-noise-adaptive-speech-enhancement-using-regularization-based-incremental-learning-2005.11760"/></url>
<url><loc>https://scifaro.com/en/abs/lite-audio-visual-speech-enhancement-2005.11769</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lite-audio-visual-speech-enhancement-2005.11769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lite-audio-visual-speech-enhancement-2005.11769"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-word-embedding-system-for-code-switching-query-by-example-spoken-term-detection-2005.11777</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-word-embedding-system-for-code-switching-query-by-example-spoken-term-detection-2005.11777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-word-embedding-system-for-code-switching-query-by-example-spoken-term-detection-2005.11777"/></url>
<url><loc>https://scifaro.com/en/abs/domain-invariant-speaker-vector-projection-by-model-agnostic-meta-learning-2005.11900</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-invariant-speaker-vector-projection-by-model-agnostic-meta-learning-2005.11900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-invariant-speaker-vector-projection-by-model-agnostic-meta-learning-2005.11900"/></url>
<url><loc>https://scifaro.com/en/abs/asr-free-pronunciation-assessment-2005.11902</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-free-pronunciation-assessment-2005.11902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-free-pronunciation-assessment-2005.11902"/></url>
<url><loc>https://scifaro.com/en/abs/neural-discriminant-analysis-for-deep-speaker-embedding-2005.11905</loc><lastmod>2020-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-discriminant-analysis-for-deep-speaker-embedding-2005.11905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-discriminant-analysis-for-deep-speaker-embedding-2005.11905"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-mispronunciation-detection-system-for-l2-english-speech-leveraging-novel-anti-phone-modeling-2005.11950</loc><lastmod>2020-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-mispronunciation-detection-system-for-l2-english-speech-leveraging-novel-anti-phone-modeling-2005.11950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-mispronunciation-detection-system-for-l2-english-speech-leveraging-novel-anti-phone-modeling-2005.11950"/></url>
<url><loc>https://scifaro.com/en/abs/masked-pre-trained-encoder-base-on-joint-ctc-transformer-2005.11978</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-pre-trained-encoder-base-on-joint-ctc-transformer-2005.11978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-pre-trained-encoder-base-on-joint-ctc-transformer-2005.11978"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-tts-for-low-resource-speakers-using-pre-trained-model-and-speech-enhancement-2005.12531</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-tts-for-low-resource-speakers-using-pre-trained-model-and-speech-enhancement-2005.12531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-tts-for-low-resource-speakers-using-pre-trained-model-and-speech-enhancement-2005.12531"/></url>
<url><loc>https://scifaro.com/en/abs/multi-staged-cross-lingual-acoustic-model-adaption-for-robust-speech-recognition-in-real-world-applications-a-case-study-on-german-oral-history-interviews-2005.12562</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-staged-cross-lingual-acoustic-model-adaption-for-robust-speech-recognition-in-real-world-applications-a-case-study-on-german-oral-history-interviews-2005.12562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-staged-cross-lingual-acoustic-model-adaption-for-robust-speech-recognition-in-real-world-applications-a-case-study-on-german-oral-history-interviews-2005.12562"/></url>
<url><loc>https://scifaro.com/en/abs/an-open-source-voice-type-classifier-for-child-centered-daylong-recordings-2005.12656</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-open-source-voice-type-classifier-for-child-centered-daylong-recordings-2005.12656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-open-source-voice-type-classifier-for-child-centered-daylong-recordings-2005.12656"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-optimal-dnn-architecture-for-end-to-end-beamformers-based-on-time-frequency-references-2005.12683</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-optimal-dnn-architecture-for-end-to-end-beamformers-based-on-time-frequency-references-2005.12683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-optimal-dnn-architecture-for-end-to-end-beamformers-based-on-time-frequency-references-2005.12683"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-vietnamese-statistical-parametric-speech-synthesis-systems-2005.12962</loc><lastmod>2020-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-vietnamese-statistical-parametric-speech-synthesis-systems-2005.12962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-vietnamese-statistical-parametric-speech-synthesis-systems-2005.12962"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-predictive-coding-supported-factorized-variational-autoencoder-for-unsupervised-learning-of-disentangled-speech-representations-2005.12963</loc><lastmod>2021-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-predictive-coding-supported-factorized-variational-autoencoder-for-unsupervised-learning-of-disentangled-speech-representations-2005.12963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-predictive-coding-supported-factorized-variational-autoencoder-for-unsupervised-learning-of-disentangled-speech-representations-2005.12963"/></url>
<url><loc>https://scifaro.com/en/abs/acgan-based-data-augmentation-integrated-with-long-term-scalogram-for-acoustic-scene-classification-2005.13146</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acgan-based-data-augmentation-integrated-with-long-term-scalogram-for-acoustic-scene-classification-2005.13146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acgan-based-data-augmentation-integrated-with-long-term-scalogram-for-acoustic-scene-classification-2005.13146"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-source-localization-with-deep-generative-modeling-2005.13163</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-source-localization-with-deep-generative-modeling-2005.13163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-source-localization-with-deep-generative-modeling-2005.13163"/></url>
<url><loc>https://scifaro.com/en/abs/insertion-based-modeling-for-end-to-end-automatic-speech-recognition-2005.13211</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/insertion-based-modeling-for-end-to-end-automatic-speech-recognition-2005.13211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/insertion-based-modeling-for-end-to-end-automatic-speech-recognition-2005.13211"/></url>
<url><loc>https://scifaro.com/en/abs/deep-sensory-substitution-noninvasively-enabling-biological-neural-networks-to-receive-input-from-artificial-neural-networks-2005.13291</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-sensory-substitution-noninvasively-enabling-biological-neural-networks-to-receive-input-from-artificial-neural-networks-2005.13291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-sensory-substitution-noninvasively-enabling-biological-neural-networks-to-receive-input-from-artificial-neural-networks-2005.13291"/></url>
<url><loc>https://scifaro.com/en/abs/cat-a-ctc-crf-based-asr-toolkit-bridging-the-hybrid-and-the-end-to-end-approaches-towards-data-efficiency-and-low-latency-2005.13326</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cat-a-ctc-crf-based-asr-toolkit-bridging-the-hybrid-and-the-end-to-end-approaches-towards-data-efficiency-and-low-latency-2005.13326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cat-a-ctc-crf-based-asr-toolkit-bridging-the-hybrid-and-the-end-to-end-approaches-towards-data-efficiency-and-low-latency-2005.13326"/></url>
<url><loc>https://scifaro.com/en/abs/modality-dropout-for-improved-performance-driven-talking-faces-2005.13616</loc><lastmod>2020-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modality-dropout-for-improved-performance-driven-talking-faces-2005.13616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modality-dropout-for-improved-performance-driven-talking-faces-2005.13616"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-audio-source-separation-using-generative-priors-2005.13769</loc><lastmod>2020-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-audio-source-separation-using-generative-priors-2005.13769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-audio-source-separation-using-generative-priors-2005.13769"/></url>
<url><loc>https://scifaro.com/en/abs/deepsonar-towards-effective-and-robust-detection-of-ai-synthesized-fake-voices-2005.13770</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepsonar-towards-effective-and-robust-detection-of-ai-synthesized-fake-voices-2005.13770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepsonar-towards-effective-and-robust-detection-of-ai-synthesized-fake-voices-2005.13770"/></url>
<url><loc>https://scifaro.com/en/abs/speech-to-singing-conversion-based-on-boundary-equilibrium-gan-2005.13835</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-to-singing-conversion-based-on-boundary-equilibrium-gan-2005.13835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-to-singing-conversion-based-on-boundary-equilibrium-gan-2005.13835"/></url>
<url><loc>https://scifaro.com/en/abs/when-can-self-attention-be-replaced-by-feed-forward-layers-2005.13895</loc><lastmod>2020-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/when-can-self-attention-be-replaced-by-feed-forward-layers-2005.13895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/when-can-self-attention-be-replaced-by-feed-forward-layers-2005.13895"/></url>
<url><loc>https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-testing-framework-and-challenge-results-2005.13981</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-testing-framework-and-challenge-results-2005.13981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interspeech-2020-deep-noise-suppression-challenge-datasets-subjective-testing-framework-and-challenge-results-2005.13981"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-restoration-of-audio-degraded-by-low-frequency-pulses-modeled-via-gaussian-process-2005.14181</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-restoration-of-audio-degraded-by-low-frequency-pulses-modeled-via-gaussian-process-2005.14181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-restoration-of-audio-degraded-by-low-frequency-pulses-modeled-via-gaussian-process-2005.14181"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-machine-learning-models-for-tabular-data-through-challenge-of-monitoring-parkinson-s-disease-progression-using-voice-recordings-2005.14257</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-machine-learning-models-for-tabular-data-through-challenge-of-monitoring-parkinson-s-disease-progression-using-voice-recordings-2005.14257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-machine-learning-models-for-tabular-data-through-challenge-of-monitoring-parkinson-s-disease-progression-using-voice-recordings-2005.14257"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-comparison-of-popular-end-to-end-models-for-large-scale-speech-recognition-2005.14327</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-comparison-of-popular-end-to-end-models-for-large-scale-speech-recognition-2005.14327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-comparison-of-popular-end-to-end-models-for-large-scale-speech-recognition-2005.14327"/></url>
<url><loc>https://scifaro.com/en/abs/sub-band-knowledge-distillation-framework-for-speech-enhancement-2005.14435</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-band-knowledge-distillation-framework-for-speech-enhancement-2005.14435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-band-knowledge-distillation-framework-for-speech-enhancement-2005.14435"/></url>
<url><loc>https://scifaro.com/en/abs/snr-based-teachers-student-technique-for-speech-enhancement-2005.14441</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snr-based-teachers-student-technique-for-speech-enhancement-2005.14441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snr-based-teachers-student-technique-for-speech-enhancement-2005.14441"/></url>
<url><loc>https://scifaro.com/en/abs/improving-unsupervised-sparsespeech-acoustic-models-with-categorical-reparameterization-2005.14578</loc><lastmod>2020-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-unsupervised-sparsespeech-acoustic-models-with-categorical-reparameterization-2005.14578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-unsupervised-sparsespeech-acoustic-models-with-categorical-reparameterization-2005.14578"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-adversarial-examples-for-speech-recognition-via-uncertainty-quantification-2005.14611</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-adversarial-examples-for-speech-recognition-via-uncertainty-quantification-2005.14611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-adversarial-examples-for-speech-recognition-via-uncertainty-quantification-2005.14611"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-classification-in-dcase-2020-challenge-generalization-across-devices-and-low-complexity-solutions-2005.14623</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-classification-in-dcase-2020-challenge-generalization-across-devices-and-low-complexity-solutions-2005.14623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-classification-in-dcase-2020-challenge-generalization-across-devices-and-low-complexity-solutions-2005.14623"/></url>
<url><loc>https://scifaro.com/en/abs/the-inesc-id-multi-modal-system-for-the-adress-2020-challenge-2005.14646</loc><lastmod>2020-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-inesc-id-multi-modal-system-for-the-adress-2020-challenge-2005.14646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-inesc-id-multi-modal-system-for-the-adress-2020-challenge-2005.14646"/></url>
<url><loc>https://scifaro.com/en/abs/assessment-of-parkinson-s-disease-medication-state-through-automatic-speech-analysis-2005.14647</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessment-of-parkinson-s-disease-medication-state-through-automatic-speech-analysis-2005.14647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessment-of-parkinson-s-disease-medication-state-through-automatic-speech-analysis-2005.14647"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-filterbank-learning-for-keyword-spotting-2006.00217</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-filterbank-learning-for-keyword-spotting-2006.00217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-filterbank-learning-for-keyword-spotting-2006.00217"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-latent-timbre-synthesis-2006.00408</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-latent-timbre-synthesis-2006.00408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-latent-timbre-synthesis-2006.00408"/></url>
<url><loc>https://scifaro.com/en/abs/crossed-time-delay-neural-network-for-speaker-recognition-2006.00452</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossed-time-delay-neural-network-for-speaker-recognition-2006.00452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossed-time-delay-neural-network-for-speaker-recognition-2006.00452"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-detection-and-analysis-of-the-patterns-of-creaky-voice-2006.00518</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-detection-and-analysis-of-the-patterns-of-creaky-voice-2006.00518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-detection-and-analysis-of-the-patterns-of-creaky-voice-2006.00518"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-voiced-frequency-estimation-exploiting-amplitude-and-phase-spectra-2006.00521</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-voiced-frequency-estimation-exploiting-amplitude-and-phase-spectra-2006.00521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-voiced-frequency-estimation-exploiting-amplitude-and-phase-spectra-2006.00521"/></url>
<url><loc>https://scifaro.com/en/abs/residual-excitation-skewness-for-automatic-speech-polarity-detection-2006.00525</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-excitation-skewness-for-automatic-speech-polarity-detection-2006.00525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-excitation-skewness-for-automatic-speech-polarity-detection-2006.00525"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-single-stage-speech-denoising-and-dereverberation-with-u-net-2006.00687</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-single-stage-speech-denoising-and-dereverberation-with-u-net-2006.00687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-single-stage-speech-denoising-and-dereverberation-with-u-net-2006.00687"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-language-identification-using-combination-of-acoustic-representations-and-asr-hypotheses-2006.00703</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-language-identification-using-combination-of-acoustic-representations-and-asr-hypotheses-2006.00703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-language-identification-using-combination-of-acoustic-representations-and-asr-hypotheses-2006.00703"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-cnn-based-automatic-music-tagging-models-2006.00751</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-cnn-based-automatic-music-tagging-models-2006.00751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-cnn-based-automatic-music-tagging-models-2006.00751"/></url>
<url><loc>https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-method-for-target-source-extraction-using-magnitude-spectrogram-as-reference-2006.00772</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-method-for-target-source-extraction-using-magnitude-spectrogram-as-reference-2006.00772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-method-for-target-source-extraction-using-magnitude-spectrogram-as-reference-2006.00772"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-recognize-code-switched-speech-without-forgetting-monolingual-speech-recognition-2006.00782</loc><lastmod>2020-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-recognize-code-switched-speech-without-forgetting-monolingual-speech-recognition-2006.00782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-recognize-code-switched-speech-without-forgetting-monolingual-speech-recognition-2006.00782"/></url>
<url><loc>https://scifaro.com/en/abs/a-time-scale-modification-dataset-with-subjective-quality-labels-2006.00848</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-time-scale-modification-dataset-with-subjective-quality-labels-2006.00848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-time-scale-modification-dataset-with-subjective-quality-labels-2006.00848"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-audio-generation-and-representation-learning-with-guided-adversarial-autoencoder-2006.00877</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-audio-generation-and-representation-learning-with-guided-adversarial-autoencoder-2006.00877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-audio-generation-and-representation-learning-with-guided-adversarial-autoencoder-2006.00877"/></url>
<url><loc>https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-using-gan-2006.01260</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-using-gan-2006.01260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-eeg-based-continuous-speech-recognition-using-gan-2006.01260"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-effect-of-speech-perception-in-eeg-based-speech-recognition-systems-2006.01261</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-effect-of-speech-perception-in-eeg-based-speech-recognition-systems-2006.01261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-effect-of-speech-perception-in-eeg-based-speech-recognition-systems-2006.01261"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-different-acoustic-features-from-eeg-and-towards-direct-synthesis-of-audio-waveform-from-eeg-2006.01262</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-different-acoustic-features-from-eeg-and-towards-direct-synthesis-of-audio-waveform-from-eeg-2006.01262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-different-acoustic-features-from-eeg-and-towards-direct-synthesis-of-audio-waveform-from-eeg-2006.01262"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-audiovisual-learning-of-sounds-with-weakly-labeled-data-2006.01595</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-audiovisual-learning-of-sounds-with-weakly-labeled-data-2006.01595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-audiovisual-learning-of-sounds-with-weakly-labeled-data-2006.01595"/></url>
<url><loc>https://scifaro.com/en/abs/dilated-u-net-based-approach-for-multichannel-speech-enhancement-from-first-order-ambisonics-recordings-2006.01708</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dilated-u-net-based-approach-for-multichannel-speech-enhancement-from-first-order-ambisonics-recordings-2006.01708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dilated-u-net-based-approach-for-multichannel-speech-enhancement-from-first-order-ambisonics-recordings-2006.01708"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speaker-diarization-with-speaker-wise-chain-rule-2006.01796</loc><lastmod>2020-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speaker-diarization-with-speaker-wise-chain-rule-2006.01796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speaker-diarization-with-speaker-wise-chain-rule-2006.01796"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-audio-attacks-on-asr-systems-with-dropout-uncertainty-2006.01906</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-audio-attacks-on-asr-systems-with-dropout-uncertainty-2006.01906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-audio-attacks-on-asr-systems-with-dropout-uncertainty-2006.01906"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-of-reverberant-spatial-sound-scenes-with-moving-sources-for-sound-event-localization-and-detection-2006.01919</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-of-reverberant-spatial-sound-scenes-with-moving-sources-for-sound-event-localization-and-detection-2006.01919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-of-reverberant-spatial-sound-scenes-with-moving-sources-for-sound-event-localization-and-detection-2006.01919"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-velocity-vector-for-retracing-the-multipath-propagation-2006.02099</loc><lastmod>2020-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-velocity-vector-for-retracing-the-multipath-propagation-2006.02099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-velocity-vector-for-retracing-the-multipath-propagation-2006.02099"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-deep-markov-model-for-unsupervised-speech-representation-learning-2006.02547</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-deep-markov-model-for-unsupervised-speech-representation-learning-2006.02547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-deep-markov-model-for-unsupervised-speech-representation-learning-2006.02547"/></url>
<url><loc>https://scifaro.com/en/abs/online-end-to-end-neural-diarization-with-speaker-tracing-buffer-2006.02616</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-end-to-end-neural-diarization-with-speaker-tracing-buffer-2006.02616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-end-to-end-neural-diarization-with-speaker-tracing-buffer-2006.02616"/></url>
<url><loc>https://scifaro.com/en/abs/multi-talker-asr-for-an-unknown-number-of-sources-joint-training-of-source-counting-separation-and-asr-2006.02786</loc><lastmod>2020-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-talker-asr-for-an-unknown-number-of-sources-joint-training-of-source-counting-separation-and-asr-2006.02786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-talker-asr-for-an-unknown-number-of-sources-joint-training-of-source-counting-separation-and-asr-2006.02786"/></url>
<url><loc>https://scifaro.com/en/abs/cstnet-contrastive-speech-translation-network-for-self-supervised-speech-representation-learning-2006.02814</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cstnet-contrastive-speech-translation-network-for-self-supervised-speech-representation-learning-2006.02814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cstnet-contrastive-speech-translation-network-for-self-supervised-speech-representation-learning-2006.02814"/></url>
<url><loc>https://scifaro.com/en/abs/constrained-variational-autoencoder-for-improving-eeg-based-speech-recognition-systems-2006.02902</loc><lastmod>2020-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/constrained-variational-autoencoder-for-improving-eeg-based-speech-recognition-systems-2006.02902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/constrained-variational-autoencoder-for-improving-eeg-based-speech-recognition-systems-2006.02902"/></url>
<url><loc>https://scifaro.com/en/abs/attention-and-encoder-decoder-based-models-for-transforming-articulatory-movements-at-different-speaking-rates-2006.03107</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-and-encoder-decoder-based-models-for-transforming-articulatory-movements-at-different-speaking-rates-2006.03107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-and-encoder-decoder-based-models-for-transforming-articulatory-movements-at-different-speaking-rates-2006.03107"/></url>
<url><loc>https://scifaro.com/en/abs/defense-for-black-box-attacks-on-anti-spoofing-models-by-self-supervised-learning-2006.03214</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defense-for-black-box-attacks-on-anti-spoofing-models-by-self-supervised-learning-2006.03214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defense-for-black-box-attacks-on-anti-spoofing-models-by-self-supervised-learning-2006.03214"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-rnn-t-for-open-domain-asr-2006.03411</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-rnn-t-for-open-domain-asr-2006.03411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-rnn-t-for-open-domain-asr-2006.03411"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-anomaly-detection-for-machine-sounds-based-on-image-transfer-learning-2006.03429</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-for-machine-sounds-based-on-image-transfer-learning-2006.03429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-anomaly-detection-for-machine-sounds-based-on-image-transfer-learning-2006.03429"/></url>
<url><loc>https://scifaro.com/en/abs/ap20-olr-challenge-three-tasks-and-their-baselines-2006.03473</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ap20-olr-challenge-three-tasks-and-their-baselines-2006.03473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ap20-olr-challenge-three-tasks-and-their-baselines-2006.03473"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-synthesis-of-hypo-and-hyperarticulated-speech-2006.04136</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-synthesis-of-hypo-and-hyperarticulated-speech-2006.04136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-synthesis-of-hypo-and-hyperarticulated-speech-2006.04136"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-phase-modeling-for-sparse-linear-prediction-of-speech-2006.04138</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-phase-modeling-for-sparse-linear-prediction-of-speech-2006.04138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-phase-modeling-for-sparse-linear-prediction-of-speech-2006.04138"/></url>
<url><loc>https://scifaro.com/en/abs/parametric-representation-for-singing-voice-synthesis-a-comparative-evaluation-2006.04142</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parametric-representation-for-singing-voice-synthesis-a-comparative-evaluation-2006.04142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parametric-representation-for-singing-voice-synthesis-a-comparative-evaluation-2006.04142"/></url>
<url><loc>https://scifaro.com/en/abs/vqvc-one-shot-voice-conversion-by-vector-quantization-and-u-net-architecture-2006.04154</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vqvc-one-shot-voice-conversion-by-vector-quantization-and-u-net-architecture-2006.04154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vqvc-one-shot-voice-conversion-by-vector-quantization-and-u-net-architecture-2006.04154"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-contrastive-learning-with-generalized-contrastive-loss-and-its-application-to-speaker-recognition-2006.04326</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-contrastive-learning-with-generalized-contrastive-loss-and-its-application-to-speaker-recognition-2006.04326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-contrastive-learning-with-generalized-contrastive-loss-and-its-application-to-speaker-recognition-2006.04326"/></url>
<url><loc>https://scifaro.com/en/abs/zero-resource-speech-synthesis-using-transcripts-derived-from-perceptual-acoustic-units-2006.04372</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-resource-speech-synthesis-using-transcripts-derived-from-perceptual-acoustic-units-2006.04372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-resource-speech-synthesis-using-transcripts-derived-from-perceptual-acoustic-units-2006.04372"/></url>
<url><loc>https://scifaro.com/en/abs/a-non-causal-fftnet-architecture-for-speech-enhancement-2006.04469</loc><lastmod>2020-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-non-causal-fftnet-architecture-for-speech-enhancement-2006.04469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-non-causal-fftnet-architecture-for-speech-enhancement-2006.04469"/></url>
<url><loc>https://scifaro.com/en/abs/fastspeech-2-fast-and-high-quality-end-to-end-text-to-speech-2006.04558</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastspeech-2-fast-and-high-quality-end-to-end-text-to-speech-2006.04558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastspeech-2-fast-and-high-quality-end-to-end-text-to-speech-2006.04558"/></url>
<url><loc>https://scifaro.com/en/abs/multispeech-multi-speaker-text-to-speech-with-transformer-2006.04664</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multispeech-multi-speaker-text-to-speech-with-transformer-2006.04664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multispeech-multi-speaker-text-to-speech-with-transformer-2006.04664"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-count-words-in-fluent-speech-enables-online-speech-recognition-2006.04928</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-count-words-in-fluent-speech-enables-online-speech-recognition-2006.04928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-count-words-in-fluent-speech-enables-online-speech-recognition-2006.04928"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effectiveness-of-neural-text-generation-based-data-augmentation-for-recognition-of-morphologically-rich-speech-2006.05129</loc><lastmod>2020-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effectiveness-of-neural-text-generation-based-data-augmentation-for-recognition-of-morphologically-rich-speech-2006.05129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effectiveness-of-neural-text-generation-based-data-augmentation-for-recognition-of-morphologically-rich-speech-2006.05129"/></url>
<url><loc>https://scifaro.com/en/abs/input-independent-attention-weights-are-expressive-enough-a-study-of-attention-in-self-supervised-audio-transformers-2006.05174</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/input-independent-attention-weights-are-expressive-enough-a-study-of-attention-in-self-supervised-audio-transformers-2006.05174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/input-independent-attention-weights-are-expressive-enough-a-study-of-attention-in-self-supervised-audio-transformers-2006.05174"/></url>
<url><loc>https://scifaro.com/en/abs/a-fully-recurrent-feature-extraction-for-single-channel-speech-enhancement-2006.05233</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fully-recurrent-feature-extraction-for-single-channel-speech-enhancement-2006.05233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fully-recurrent-feature-extraction-for-single-channel-speech-enhancement-2006.05233"/></url>
<url><loc>https://scifaro.com/en/abs/learning-not-to-discriminate-task-agnostic-learning-for-improving-monolingual-and-code-switched-speech-recognition-2006.05257</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-not-to-discriminate-task-agnostic-learning-for-improving-monolingual-and-code-switched-speech-recognition-2006.05257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-not-to-discriminate-task-agnostic-learning-for-improving-monolingual-and-code-switched-speech-recognition-2006.05257"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-markers-from-sustained-phonation-in-huntington-s-disease-2006.05365</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-markers-from-sustained-phonation-in-huntington-s-disease-2006.05365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-markers-from-sustained-phonation-in-huntington-s-disease-2006.05365"/></url>
<url><loc>https://scifaro.com/en/abs/improving-cross-lingual-transfer-learning-for-end-to-end-speech-recognition-with-speech-translation-2006.05474</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-cross-lingual-transfer-learning-for-end-to-end-speech-recognition-with-speech-translation-2006.05474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-cross-lingual-transfer-learning-for-end-to-end-speech-recognition-with-speech-translation-2006.05474"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-quality-and-generalizability-in-parameterized-neural-audio-effects-2006.05584</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-quality-and-generalizability-in-parameterized-neural-audio-effects-2006.05584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-quality-and-generalizability-in-parameterized-neural-audio-effects-2006.05584"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-using-recurrent-neural-networks-2006.05596</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-using-recurrent-neural-networks-2006.05596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-using-recurrent-neural-networks-2006.05596"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-replay-spoofing-aware-text-independent-speaker-verification-2006.05599</loc><lastmod>2020-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-replay-spoofing-aware-text-independent-speaker-verification-2006.05599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-replay-spoofing-aware-text-independent-speaker-verification-2006.05599"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-gan-high-fidelity-denoising-and-dereverberation-based-on-speech-deep-features-in-adversarial-networks-2006.05694</loc><lastmod>2020-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-gan-high-fidelity-denoising-and-dereverberation-based-on-speech-deep-features-in-adversarial-networks-2006.05694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-gan-high-fidelity-denoising-and-dereverberation-based-on-speech-deep-features-in-adversarial-networks-2006.05694"/></url>
<url><loc>https://scifaro.com/en/abs/listen-to-what-you-want-neural-network-based-universal-sound-selector-2006.05712</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-to-what-you-want-neural-network-based-universal-sound-selector-2006.05712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-to-what-you-want-neural-network-based-universal-sound-selector-2006.05712"/></url>
<url><loc>https://scifaro.com/en/abs/uniphore-s-submission-to-fearless-steps-challenge-phase-2-2006.05747</loc><lastmod>2020-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniphore-s-submission-to-fearless-steps-challenge-phase-2-2006.05747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniphore-s-submission-to-fearless-steps-challenge-phase-2-2006.05747"/></url>
<url><loc>https://scifaro.com/en/abs/third-dihard-challenge-evaluation-plan-2006.05815</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/third-dihard-challenge-evaluation-plan-2006.05815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/third-dihard-challenge-evaluation-plan-2006.05815"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-discussion-on-dcase2020-challenge-task2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2006.05822</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase2020-challenge-task2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2006.05822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase2020-challenge-task2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2006.05822"/></url>
<url><loc>https://scifaro.com/en/abs/an-objective-measure-of-quality-for-time-scale-modification-of-audio-2006.06153</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-objective-measure-of-quality-for-time-scale-modification-of-audio-2006.06153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-objective-measure-of-quality-for-time-scale-modification-of-audio-2006.06153"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-robustness-of-adversarial-samples-detection-for-automatic-speaker-verification-2006.06186</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-robustness-of-adversarial-samples-detection-for-automatic-speaker-verification-2006.06186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-robustness-of-adversarial-samples-detection-for-automatic-speaker-verification-2006.06186"/></url>
<url><loc>https://scifaro.com/en/abs/xiaoicesing-a-high-quality-and-integrated-singing-voice-synthesis-system-2006.06261</loc><lastmod>2020-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xiaoicesing-a-high-quality-and-integrated-singing-voice-synthesis-system-2006.06261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xiaoicesing-a-high-quality-and-integrated-singing-voice-synthesis-system-2006.06261"/></url>
<url><loc>https://scifaro.com/en/abs/deep-generative-models-for-musical-audio-synthesis-2006.06426</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-generative-models-for-musical-audio-synthesis-2006.06426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-generative-models-for-musical-audio-synthesis-2006.06426"/></url>
<url><loc>https://scifaro.com/en/abs/fastpitch-parallel-text-to-speech-with-pitch-prediction-2006.06873</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastpitch-parallel-text-to-speech-with-pitch-prediction-2006.06873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastpitch-parallel-text-to-speech-with-pitch-prediction-2006.06873"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-voice-conversion-based-on-source-to-target-direct-mapping-2006.06937</loc><lastmod>2020-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-based-on-source-to-target-direct-mapping-2006.06937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-voice-conversion-based-on-source-to-target-direct-mapping-2006.06937"/></url>
<url><loc>https://scifaro.com/en/abs/neural-voice-cloning-with-a-few-low-quality-samples-2006.06940</loc><lastmod>2020-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-voice-cloning-with-a-few-low-quality-samples-2006.06940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-voice-cloning-with-a-few-low-quality-samples-2006.06940"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adversarial-training-of-multi-speaker-tts-2006.06942</loc><lastmod>2020-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adversarial-training-of-multi-speaker-tts-2006.06942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adversarial-training-of-multi-speaker-tts-2006.06942"/></url>
<url><loc>https://scifaro.com/en/abs/generic-indic-text-to-speech-synthesisers-with-rapid-adaptation-in-an-end-to-end-framework-2006.06971</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generic-indic-text-to-speech-synthesisers-with-rapid-adaptation-in-an-end-to-end-framework-2006.06971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generic-indic-text-to-speech-synthesisers-with-rapid-adaptation-in-an-end-to-end-framework-2006.06971"/></url>
<url><loc>https://scifaro.com/en/abs/monolingual-data-selection-analysis-for-english-mandarin-hybrid-code-switching-speech-recognition-2006.07094</loc><lastmod>2020-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monolingual-data-selection-analysis-for-english-mandarin-hybrid-code-switching-speech-recognition-2006.07094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monolingual-data-selection-analysis-for-english-mandarin-hybrid-code-switching-speech-recognition-2006.07094"/></url>
<url><loc>https://scifaro.com/en/abs/se-melgan-speaker-agnostic-rapid-speech-enhancement-2006.07637</loc><lastmod>2020-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/se-melgan-speaker-agnostic-rapid-speech-enhancement-2006.07637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/se-melgan-speaker-agnostic-rapid-speech-enhancement-2006.07637"/></url>
<url><loc>https://scifaro.com/en/abs/the-jhu-multi-microphone-multi-speaker-asr-system-for-the-chime-6-challenge-2006.07898</loc><lastmod>2020-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-jhu-multi-microphone-multi-speaker-asr-system-for-the-chime-6-challenge-2006.07898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-jhu-multi-microphone-multi-speaker-asr-system-for-the-chime-6-challenge-2006.07898"/></url>
<url><loc>https://scifaro.com/en/abs/uwspeech-speech-to-speech-translation-for-unwritten-languages-2006.07926</loc><lastmod>2020-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uwspeech-speech-to-speech-translation-for-unwritten-languages-2006.07926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uwspeech-speech-to-speech-translation-for-unwritten-languages-2006.07926"/></url>
<url><loc>https://scifaro.com/en/abs/solos-a-dataset-for-audio-visual-music-analysis-2006.07931</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/solos-a-dataset-for-audio-visual-music-analysis-2006.07931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/solos-a-dataset-for-audio-visual-music-analysis-2006.07931"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-recognition-in-audio-and-video-using-deep-neural-networks-2006.08129</loc><lastmod>2020-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-recognition-in-audio-and-video-using-deep-neural-networks-2006.08129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-recognition-in-audio-and-video-using-deep-neural-networks-2006.08129"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-end-to-end-asr-for-openstt-russian-open-speech-to-text-dataset-2006.08274</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-end-to-end-asr-for-openstt-russian-open-speech-to-text-dataset-2006.08274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-end-to-end-asr-for-openstt-russian-open-speech-to-text-dataset-2006.08274"/></url>
<url><loc>https://scifaro.com/en/abs/an-iterative-graph-spectral-subtraction-method-for-speech-enhancement-2006.08497</loc><lastmod>2020-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-iterative-graph-spectral-subtraction-method-for-speech-enhancement-2006.08497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-iterative-graph-spectral-subtraction-method-for-speech-enhancement-2006.08497"/></url>
<url><loc>https://scifaro.com/en/abs/regularized-forward-backward-decoder-for-attention-models-2006.08506</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regularized-forward-backward-decoder-for-attention-models-2006.08506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regularized-forward-backward-decoder-for-attention-models-2006.08506"/></url>
<url><loc>https://scifaro.com/en/abs/robust-sound-source-tracking-using-srp-phat-and-3d-convolutional-neural-networks-2006.09006</loc><lastmod>2020-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-sound-source-tracking-using-srp-phat-and-3d-convolutional-neural-networks-2006.09006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-sound-source-tracking-using-srp-phat-and-3d-convolutional-neural-networks-2006.09006"/></url>
<url><loc>https://scifaro.com/en/abs/quantization-of-acoustic-model-parameters-in-automatic-speech-recognition-framework-2006.09054</loc><lastmod>2020-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantization-of-acoustic-model-parameters-in-automatic-speech-recognition-framework-2006.09054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantization-of-acoustic-model-parameters-in-automatic-speech-recognition-framework-2006.09054"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-representation-learning-for-private-speech-generation-2006.09114</loc><lastmod>2020-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-representation-learning-for-private-speech-generation-2006.09114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-representation-learning-for-private-speech-generation-2006.09114"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-representations-for-audio-synthesis-using-generative-adversarial-networks-2006.09266</loc><lastmod>2020-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-representations-for-audio-synthesis-using-generative-adversarial-networks-2006.09266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-representations-for-audio-synthesis-using-generative-adversarial-networks-2006.09266"/></url>
<url><loc>https://scifaro.com/en/abs/visual-attention-for-musical-instrument-recognition-2006.09640</loc><lastmod>2020-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-attention-for-musical-instrument-recognition-2006.09640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-attention-for-musical-instrument-recognition-2006.09640"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-neural-network-for-audio-classification-with-a-classifier-attention-mechanism-2006.09815</loc><lastmod>2020-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-neural-network-for-audio-classification-with-a-classifier-attention-mechanism-2006.09815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-neural-network-for-audio-classification-with-a-classifier-attention-mechanism-2006.09815"/></url>
<url><loc>https://scifaro.com/en/abs/generative-modelling-for-controllable-audio-synthesis-of-expressive-piano-performance-2006.09833</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-modelling-for-controllable-audio-synthesis-of-expressive-piano-performance-2006.09833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-modelling-for-controllable-audio-synthesis-of-expressive-piano-performance-2006.09833"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-networks-for-music-generation-2006.09838</loc><lastmod>2020-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-networks-for-music-generation-2006.09838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-networks-for-music-generation-2006.09838"/></url>
<url><loc>https://scifaro.com/en/abs/are-you-wearing-a-mask-improving-mask-detection-from-speech-using-augmentation-by-cycle-consistent-gans-2006.10147</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-you-wearing-a-mask-improving-mask-detection-from-speech-using-augmentation-by-cycle-consistent-gans-2006.10147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-you-wearing-a-mask-improving-mask-detection-from-speech-using-augmentation-by-cycle-consistent-gans-2006.10147"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-objective-scores-of-a-speech-enhancement-model-by-metricgan-post-processing-2006.10296</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-objective-scores-of-a-speech-enhancement-model-by-metricgan-post-processing-2006.10296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-objective-scores-of-a-speech-enhancement-model-by-metricgan-post-processing-2006.10296"/></url>
<url><loc>https://scifaro.com/en/abs/adversarially-trained-multi-singer-sequence-to-sequence-singing-synthesizer-2006.10317</loc><lastmod>2020-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarially-trained-multi-singer-sequence-to-sequence-singing-synthesizer-2006.10317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarially-trained-multi-singer-sequence-to-sequence-singing-synthesizer-2006.10317"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-2006.10388</loc><lastmod>2020-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-2006.10388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-2006.10388"/></url>
<url><loc>https://scifaro.com/en/abs/self-and-mixed-attention-decoder-with-deep-acoustic-structure-for-transformer-based-lvcsr-2006.10407</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-and-mixed-attention-decoder-with-deep-acoustic-structure-for-transformer-based-lvcsr-2006.10407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-and-mixed-attention-decoder-with-deep-acoustic-structure-for-transformer-based-lvcsr-2006.10407"/></url>
<url><loc>https://scifaro.com/en/abs/multi-encoder-decoder-transformer-for-code-switching-speech-recognition-2006.10414</loc><lastmod>2020-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-encoder-decoder-transformer-for-code-switching-speech-recognition-2006.10414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-encoder-decoder-transformer-for-code-switching-speech-recognition-2006.10414"/></url>
<url><loc>https://scifaro.com/en/abs/deep-dense-and-convolutional-autoencoders-for-unsupervised-anomaly-detection-in-machine-condition-sounds-2006.10417</loc><lastmod>2020-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-dense-and-convolutional-autoencoders-for-unsupervised-anomaly-detection-in-machine-condition-sounds-2006.10417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-dense-and-convolutional-autoencoders-for-unsupervised-anomaly-detection-in-machine-condition-sounds-2006.10417"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speaker-counting-speech-recognition-and-speaker-identification-for-overlapped-speech-of-any-number-of-speakers-2006.10930</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speaker-counting-speech-recognition-and-speaker-identification-for-overlapped-speech-of-any-number-of-speakers-2006.10930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speaker-counting-speech-recognition-and-speaker-identification-for-overlapped-speech-of-any-number-of-speakers-2006.10930"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-active-learning-for-speech-recognition-with-noisy-pseudo-labeled-samples-2006.11021</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-active-learning-for-speech-recognition-with-noisy-pseudo-labeled-samples-2006.11021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-active-learning-for-speech-recognition-with-noisy-pseudo-labeled-samples-2006.11021"/></url>
<url><loc>https://scifaro.com/en/abs/towards-reliable-real-time-opera-tracking-combining-alignment-with-audio-event-detectors-to-increase-robustness-2006.11033</loc><lastmod>2020-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-reliable-real-time-opera-tracking-combining-alignment-with-audio-event-detectors-to-increase-robustness-2006.11033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-reliable-real-time-opera-tracking-combining-alignment-with-audio-event-detectors-to-increase-robustness-2006.11033"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-based-voice-activity-detection-exploiting-fully-convolutional-networks-with-multi-branched-encoders-2006.11139</loc><lastmod>2020-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-based-voice-activity-detection-exploiting-fully-convolutional-networks-with-multi-branched-encoders-2006.11139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-based-voice-activity-detection-exploiting-fully-convolutional-networks-with-multi-branched-encoders-2006.11139"/></url>
<url><loc>https://scifaro.com/en/abs/clarity-machine-learning-challenges-to-revolutionise-hearing-device-processing-2006.11140</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clarity-machine-learning-challenges-to-revolutionise-hearing-device-processing-2006.11140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clarity-machine-learning-challenges-to-revolutionise-hearing-device-processing-2006.11140"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioned-acoustic-to-articulatory-inversion-using-x-vectors-2006.11536</loc><lastmod>2020-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioned-acoustic-to-articulatory-inversion-using-x-vectors-2006.11536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioned-acoustic-to-articulatory-inversion-using-x-vectors-2006.11536"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-and-multilingual-mixlingual-speech-driven-talking-head-generation-using-phonetic-posteriorgrams-2006.11610</loc><lastmod>2020-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-and-multilingual-mixlingual-speech-driven-talking-head-generation-using-phonetic-posteriorgrams-2006.11610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-and-multilingual-mixlingual-speech-driven-talking-head-generation-using-phonetic-posteriorgrams-2006.11610"/></url>
<url><loc>https://scifaro.com/en/abs/human-emotion-detection-from-audio-and-video-signals-2006.11871</loc><lastmod>2020-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-emotion-detection-from-audio-and-video-signals-2006.11871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-emotion-detection-from-audio-and-video-signals-2006.11871"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-using-activity-coupled-cartesian-doa-vector-and-rd3net-2006.12014</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-activity-coupled-cartesian-doa-vector-and-rd3net-2006.12014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-using-activity-coupled-cartesian-doa-vector-and-rd3net-2006.12014"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-representations-improve-end-to-end-speech-translation-2006.12124</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-representations-improve-end-to-end-speech-translation-2006.12124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-representations-improve-end-to-end-speech-translation-2006.12124"/></url>
<url><loc>https://scifaro.com/en/abs/articulatory-wavenet-autoregressive-model-for-acoustic-to-articulatory-inversion-2006.12594</loc><lastmod>2020-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/articulatory-wavenet-autoregressive-model-for-acoustic-to-articulatory-inversion-2006.12594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/articulatory-wavenet-autoregressive-model-for-acoustic-to-articulatory-inversion-2006.12594"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-sound-separation-using-mixture-invariant-training-2006.12701</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-sound-separation-using-mixture-invariant-training-2006.12701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-sound-separation-using-mixture-invariant-training-2006.12701"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-enhancement-in-the-waveform-domain-2006.12847</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-enhancement-in-the-waveform-domain-2006.12847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-enhancement-in-the-waveform-domain-2006.12847"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-online-noise-reduction-on-embedded-devices-using-hierarchical-recurrent-neural-networks-2006.13067</loc><lastmod>2020-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-online-noise-reduction-on-embedded-devices-using-hierarchical-recurrent-neural-networks-2006.13067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-online-noise-reduction-on-embedded-devices-using-hierarchical-recurrent-neural-networks-2006.13067"/></url>
<url><loc>https://scifaro.com/en/abs/clc-complex-linear-coding-for-the-dns-2020-challenge-2006.13077</loc><lastmod>2020-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clc-complex-linear-coding-for-the-dns-2020-challenge-2006.13077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clc-complex-linear-coding-for-the-dns-2020-challenge-2006.13077"/></url>
<url><loc>https://scifaro.com/en/abs/embodied-self-supervised-learning-by-coordinated-sampling-and-training-2006.13350</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embodied-self-supervised-learning-by-coordinated-sampling-and-training-2006.13350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embodied-self-supervised-learning-by-coordinated-sampling-and-training-2006.13350"/></url>
<url><loc>https://scifaro.com/en/abs/face-to-music-translation-using-a-distance-preserving-generative-adversarial-network-with-an-auxiliary-discriminator-2006.13469</loc><lastmod>2020-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/face-to-music-translation-using-a-distance-preserving-generative-adversarial-network-with-an-auxiliary-discriminator-2006.13469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/face-to-music-translation-using-a-distance-preserving-generative-adversarial-network-with-an-auxiliary-discriminator-2006.13469"/></url>
<url><loc>https://scifaro.com/en/abs/black-box-adaptation-of-asr-for-accented-speech-2006.13519</loc><lastmod>2020-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/black-box-adaptation-of-asr-for-accented-speech-2006.13519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/black-box-adaptation-of-asr-for-accented-speech-2006.13519"/></url>
<url><loc>https://scifaro.com/en/abs/multi-path-rnn-for-hierarchical-modeling-of-long-sequential-data-and-its-application-to-speaker-stream-separation-2006.13579</loc><lastmod>2020-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-path-rnn-for-hierarchical-modeling-of-long-sequential-data-and-its-application-to-speaker-stream-separation-2006.13579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-path-rnn-for-hierarchical-modeling-of-long-sequential-data-and-its-application-to-speaker-stream-separation-2006.13579"/></url>
<url><loc>https://scifaro.com/en/abs/gamma-boltzmann-machine-for-simultaneously-modeling-linear-and-log-amplitude-spectra-2006.13590</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gamma-boltzmann-machine-for-simultaneously-modeling-linear-and-log-amplitude-spectra-2006.13590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gamma-boltzmann-machine-for-simultaneously-modeling-linear-and-log-amplitude-spectra-2006.13590"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-based-distance-estimation-for-geometry-calibration-in-acoustic-sensor-networks-2006.13769</loc><lastmod>2020-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-based-distance-estimation-for-geometry-calibration-in-acoustic-sensor-networks-2006.13769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-based-distance-estimation-for-geometry-calibration-in-acoustic-sensor-networks-2006.13769"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditional-chain-model-for-speech-separation-and-extraction-2006.14149</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditional-chain-model-for-speech-separation-and-extraction-2006.14149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditional-chain-model-for-speech-separation-and-extraction-2006.14149"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-multi-sequence-learning-via-conditional-chain-mapping-for-mixture-signals-2006.14150</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-multi-sequence-learning-via-conditional-chain-mapping-for-mixture-signals-2006.14150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-multi-sequence-learning-via-conditional-chain-mapping-for-mixture-signals-2006.14150"/></url>
<url><loc>https://scifaro.com/en/abs/dialogue-enhancement-in-object-based-audio-evaluating-the-benefit-on-people-above-65-2006.14282</loc><lastmod>2020-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialogue-enhancement-in-object-based-audio-evaluating-the-benefit-on-people-above-65-2006.14282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialogue-enhancement-in-object-based-audio-evaluating-the-benefit-on-people-above-65-2006.14282"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-transformer-asr-with-blockwise-synchronous-beam-search-2006.14941</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-transformer-asr-with-blockwise-synchronous-beam-search-2006.14941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-transformer-asr-with-blockwise-synchronous-beam-search-2006.14941"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-using-unsupervised-and-semi-supervised-autoencoders-and-gammatone-audio-representation-2006.15321</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-unsupervised-and-semi-supervised-autoencoders-and-gammatone-audio-representation-2006.15321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-using-unsupervised-and-semi-supervised-autoencoders-and-gammatone-audio-representation-2006.15321"/></url>
<url><loc>https://scifaro.com/en/abs/listen-carefully-and-tell-an-audio-captioning-system-based-on-residual-learning-and-gammatone-audio-representation-2006.15406</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-carefully-and-tell-an-audio-captioning-system-based-on-residual-learning-and-gammatone-audio-representation-2006.15406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-carefully-and-tell-an-audio-captioning-system-based-on-residual-learning-and-gammatone-audio-representation-2006.15406"/></url>
<url><loc>https://scifaro.com/en/abs/prosodic-prominence-and-boundaries-in-sequence-to-sequence-speech-synthesis-2006.15967</loc><lastmod>2020-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosodic-prominence-and-boundaries-in-sequence-to-sequence-speech-synthesis-2006.15967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosodic-prominence-and-boundaries-in-sequence-to-sequence-speech-synthesis-2006.15967"/></url>
<url><loc>https://scifaro.com/en/abs/a-speech-enhancement-algorithm-based-on-non-negative-hidden-markov-model-and-kullback-leibler-divergence-2006.16689</loc><lastmod>2020-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speech-enhancement-algorithm-based-on-non-negative-hidden-markov-model-and-kullback-leibler-divergence-2006.16689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speech-enhancement-algorithm-based-on-non-negative-hidden-markov-model-and-kullback-leibler-divergence-2006.16689"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-frequency-lstm-an-efficient-frontend-for-automatic-speech-recognition-2007.00131</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-frequency-lstm-an-efficient-frontend-for-automatic-speech-recognition-2007.00131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-frequency-lstm-an-efficient-frontend-for-automatic-speech-recognition-2007.00131"/></url>
<url><loc>https://scifaro.com/en/abs/whole-word-segmental-speech-recognition-with-acoustic-word-embeddings-2007.00183</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whole-word-segmental-speech-recognition-with-acoustic-word-embeddings-2007.00183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whole-word-segmental-speech-recognition-with-acoustic-word-embeddings-2007.00183"/></url>
<url><loc>https://scifaro.com/en/abs/personalization-of-hearing-aid-compression-by-human-in-loop-deep-reinforcement-learning-2007.00192</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalization-of-hearing-aid-compression-by-human-in-loop-deep-reinforcement-learning-2007.00192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalization-of-hearing-aid-compression-by-human-in-loop-deep-reinforcement-learning-2007.00192"/></url>
<url><loc>https://scifaro.com/en/abs/a-transformer-based-audio-captioning-model-with-keyword-estimation-2007.00222</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transformer-based-audio-captioning-model-with-keyword-estimation-2007.00222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transformer-based-audio-captioning-model-with-keyword-estimation-2007.00222"/></url>
<url><loc>https://scifaro.com/en/abs/the-ntt-dcase2020-challenge-task-6-system-automated-audio-captioning-with-keywords-and-sentence-length-estimation-2007.00225</loc><lastmod>2020-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ntt-dcase2020-challenge-task-6-system-automated-audio-captioning-with-keywords-and-sentence-length-estimation-2007.00225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ntt-dcase2020-challenge-task-6-system-automated-audio-captioning-with-keywords-and-sentence-length-estimation-2007.00225"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-time-domain-deep-attractor-network-with-two-stream-architectures-in-a-reverberant-environment-2007.00272</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-time-domain-deep-attractor-network-with-two-stream-architectures-in-a-reverberant-environment-2007.00272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-time-domain-deep-attractor-network-with-two-stream-architectures-in-a-reverberant-environment-2007.00272"/></url>
<url><loc>https://scifaro.com/en/abs/instantaneous-psd-estimation-for-speech-enhancement-based-on-generalized-principal-components-2007.00542</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instantaneous-psd-estimation-for-speech-enhancement-based-on-generalized-principal-components-2007.00542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instantaneous-psd-estimation-for-speech-enhancement-based-on-generalized-principal-components-2007.00542"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-and-gpt-2-synthetic-speech-transfer-learning-for-speaker-recognition-to-overcome-data-scarcity-2007.00659</loc><lastmod>2020-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-and-gpt-2-synthetic-speech-transfer-learning-for-speaker-recognition-to-overcome-data-scarcity-2007.00659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-and-gpt-2-synthetic-speech-transfer-learning-for-speaker-recognition-to-overcome-data-scarcity-2007.00659"/></url>
<url><loc>https://scifaro.com/en/abs/automated-empathy-detection-for-oncology-encounters-2007.00809</loc><lastmod>2020-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-empathy-detection-for-oncology-encounters-2007.00809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-empathy-detection-for-oncology-encounters-2007.00809"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-nmf-cnn-for-sound-event-detection-2007.00908</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-nmf-cnn-for-sound-event-detection-2007.00908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-nmf-cnn-for-sound-event-detection-2007.00908"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-sound-event-detection-based-on-convolutional-recurrent-neural-networks-with-semi-supervised-loss-function-for-dcase-challenge-2020-task-4-2007.00947</loc><lastmod>2020-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-based-on-convolutional-recurrent-neural-networks-with-semi-supervised-loss-function-for-dcase-challenge-2020-task-4-2007.00947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-sound-event-detection-based-on-convolutional-recurrent-neural-networks-with-semi-supervised-loss-function-for-dcase-challenge-2020-task-4-2007.00947"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmenting-contrastive-learning-of-speech-representations-in-the-time-domain-2007.00991</loc><lastmod>2020-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmenting-contrastive-learning-of-speech-representations-in-the-time-domain-2007.00991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmenting-contrastive-learning-of-speech-representations-in-the-time-domain-2007.00991"/></url>
<url><loc>https://scifaro.com/en/abs/online-supervised-acoustic-system-identification-exploiting-prelearned-local-affine-subspace-models-2007.01543</loc><lastmod>2020-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-supervised-acoustic-system-identification-exploiting-prelearned-local-affine-subspace-models-2007.01543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-supervised-acoustic-system-identification-exploiting-prelearned-local-affine-subspace-models-2007.01543"/></url>
<url><loc>https://scifaro.com/en/abs/distortionless-multi-channel-target-speech-enhancement-for-overlapped-speech-recognition-2007.01566</loc><lastmod>2020-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distortionless-multi-channel-target-speech-enhancement-for-overlapped-speech-recognition-2007.01566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distortionless-multi-channel-target-speech-enhancement-for-overlapped-speech-recognition-2007.01566"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-adaptation-control-for-supervised-acoustic-system-identification-exploiting-a-noise-dictionary-2007.01579</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-adaptation-control-for-supervised-acoustic-system-identification-exploiting-a-noise-dictionary-2007.01579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-adaptation-control-for-supervised-acoustic-system-identification-exploiting-a-noise-dictionary-2007.01579"/></url>
<url><loc>https://scifaro.com/en/abs/pretrained-semantic-speech-embeddings-for-end-to-end-spoken-language-understanding-via-cross-modal-teacher-student-learning-2007.01836</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretrained-semantic-speech-embeddings-for-end-to-end-spoken-language-understanding-via-cross-modal-teacher-student-learning-2007.01836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretrained-semantic-speech-embeddings-for-end-to-end-spoken-language-understanding-via-cross-modal-teacher-student-learning-2007.01836"/></url>
<url><loc>https://scifaro.com/en/abs/resnext-and-res2net-structures-for-speaker-verification-2007.02480</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resnext-and-res2net-structures-for-speaker-verification-2007.02480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resnext-and-res2net-structures-for-speaker-verification-2007.02480"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-sub-sampling-of-audio-feature-sequences-for-automated-audio-captioning-2007.02676</loc><lastmod>2020-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-sub-sampling-of-audio-feature-sequences-for-automated-audio-captioning-2007.02676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-sub-sampling-of-audio-feature-sequences-for-automated-audio-captioning-2007.02676"/></url>
<url><loc>https://scifaro.com/en/abs/depthwise-separable-convolutions-versus-recurrent-neural-networks-for-monaural-singing-voice-separation-2007.02683</loc><lastmod>2020-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/depthwise-separable-convolutions-versus-recurrent-neural-networks-for-monaural-singing-voice-separation-2007.02683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/depthwise-separable-convolutions-versus-recurrent-neural-networks-for-monaural-singing-voice-separation-2007.02683"/></url>
<url><loc>https://scifaro.com/en/abs/massively-multilingual-asr-50-languages-1-model-1-billion-parameters-2007.03001</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/massively-multilingual-asr-50-languages-1-model-1-billion-parameters-2007.03001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/massively-multilingual-asr-50-languages-1-model-1-billion-parameters-2007.03001"/></url>
<url><loc>https://scifaro.com/en/abs/multi-tones-phase-coding-mtpc-of-interaural-time-difference-by-spiking-neural-network-2007.03274</loc><lastmod>2020-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-tones-phase-coding-mtpc-of-interaural-time-difference-by-spiking-neural-network-2007.03274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-tones-phase-coding-mtpc-of-interaural-time-difference-by-spiking-neural-network-2007.03274"/></url>
<url><loc>https://scifaro.com/en/abs/x-vectors-new-quantitative-biomarkers-for-early-parkinson-s-disease-detection-from-speech-2007.03599</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-vectors-new-quantitative-biomarkers-for-early-parkinson-s-disease-detection-from-speech-2007.03599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-vectors-new-quantitative-biomarkers-for-early-parkinson-s-disease-detection-from-speech-2007.03599"/></url>
<url><loc>https://scifaro.com/en/abs/surveying-off-board-and-extra-vehicular-monitoring-and-progress-towards-pervasive-diagnostics-2007.03759</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surveying-off-board-and-extra-vehicular-monitoring-and-progress-towards-pervasive-diagnostics-2007.03759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surveying-off-board-and-extra-vehicular-monitoring-and-progress-towards-pervasive-diagnostics-2007.03759"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-end-to-end-bilingual-asr-systems-with-joint-language-identification-2007.03900</loc><lastmod>2020-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-end-to-end-bilingual-asr-systems-with-joint-language-identification-2007.03900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-end-to-end-bilingual-asr-systems-with-joint-language-identification-2007.03900"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speech-representations-from-raw-audio-by-joint-audiovisual-self-supervision-2007.04134</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speech-representations-from-raw-audio-by-joint-audiovisual-self-supervision-2007.04134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speech-representations-from-raw-audio-by-joint-audiovisual-self-supervision-2007.04134"/></url>
<url><loc>https://scifaro.com/en/abs/deepsinger-singing-voice-synthesis-with-data-mined-from-the-web-2007.04590</loc><lastmod>2020-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepsinger-singing-voice-synthesis-with-data-mined-from-the-web-2007.04590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepsinger-singing-voice-synthesis-with-data-mined-from-the-web-2007.04590"/></url>
<url><loc>https://scifaro.com/en/abs/capturing-scattered-discriminative-information-using-a-deep-architecture-in-acoustic-scene-classification-2007.04631</loc><lastmod>2020-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capturing-scattered-discriminative-information-using-a-deep-architecture-in-acoustic-scene-classification-2007.04631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capturing-scattered-discriminative-information-using-a-deep-architecture-in-acoustic-scene-classification-2007.04631"/></url>
<url><loc>https://scifaro.com/en/abs/gated-recurrent-context-softmax-free-attention-for-online-encoder-decoder-speech-recognition-2007.05214</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gated-recurrent-context-softmax-free-attention-for-online-encoder-decoder-speech-recognition-2007.05214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gated-recurrent-context-softmax-free-attention-for-online-encoder-decoder-speech-recognition-2007.05214"/></url>
<url><loc>https://scifaro.com/en/abs/id-conditioned-auto-encoder-for-unsupervised-anomaly-detection-2007.05314</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/id-conditioned-auto-encoder-for-unsupervised-anomaly-detection-2007.05314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/id-conditioned-auto-encoder-for-unsupervised-anomaly-detection-2007.05314"/></url>
<url><loc>https://scifaro.com/en/abs/darf-a-data-reduced-fade-version-for-simulations-of-speech-recognition-thresholds-with-real-hearing-aids-2007.05378</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/darf-a-data-reduced-fade-version-for-simulations-of-speech-recognition-thresholds-with-real-hearing-aids-2007.05378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/darf-a-data-reduced-fade-version-for-simulations-of-speech-recognition-thresholds-with-real-hearing-aids-2007.05378"/></url>
<url><loc>https://scifaro.com/en/abs/quasi-periodic-wavenet-an-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.05663</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quasi-periodic-wavenet-an-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.05663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quasi-periodic-wavenet-an-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.05663"/></url>
<url><loc>https://scifaro.com/en/abs/fast-griffin-lim-based-waveform-generation-strategy-for-text-to-speech-synthesis-2007.05764</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-griffin-lim-based-waveform-generation-strategy-for-text-to-speech-synthesis-2007.05764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-griffin-lim-based-waveform-generation-strategy-for-text-to-speech-synthesis-2007.05764"/></url>
<url><loc>https://scifaro.com/en/abs/the-asru-2019-mandarin-english-code-switching-speech-recognition-challenge-open-datasets-tracks-methods-and-results-2007.05916</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-asru-2019-mandarin-english-code-switching-speech-recognition-challenge-open-datasets-tracks-methods-and-results-2007.05916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-asru-2019-mandarin-english-code-switching-speech-recognition-challenge-open-datasets-tracks-methods-and-results-2007.05916"/></url>
<url><loc>https://scifaro.com/en/abs/tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-fundamentals-2007.05979</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-fundamentals-2007.05979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tandem-assessment-of-spoofing-countermeasures-and-automatic-speaker-verification-fundamentals-2007.05979"/></url>
<url><loc>https://scifaro.com/en/abs/nisp-a-multi-lingual-multi-accent-dataset-for-speaker-profiling-2007.06021</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nisp-a-multi-lingual-multi-accent-dataset-for-speaker-profiling-2007.06021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nisp-a-multi-lingual-multi-accent-dataset-for-speaker-profiling-2007.06021"/></url>
<url><loc>https://scifaro.com/en/abs/tera-self-supervised-learning-of-transformer-encoder-representation-for-speech-2007.06028</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tera-self-supervised-learning-of-transformer-encoder-representation-for-speech-2007.06028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tera-self-supervised-learning-of-transformer-encoder-representation-for-speech-2007.06028"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-language-identification-with-multilingual-capsnet-model-2007.06078</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-language-identification-with-multilingual-capsnet-model-2007.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-language-identification-with-multilingual-capsnet-model-2007.06078"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-neural-networks-jamming-on-the-beat-2007.06284</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-neural-networks-jamming-on-the-beat-2007.06284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-neural-networks-jamming-on-the-beat-2007.06284"/></url>
<url><loc>https://scifaro.com/en/abs/vector-quantized-timbre-representation-2007.06349</loc><lastmod>2020-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vector-quantized-timbre-representation-2007.06349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vector-quantized-timbre-representation-2007.06349"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-lyrics-transcription-using-dilated-convolutional-neural-networks-with-self-attention-2007.06486</loc><lastmod>2020-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-lyrics-transcription-using-dilated-convolutional-neural-networks-with-self-attention-2007.06486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-lyrics-transcription-using-dilated-convolutional-neural-networks-with-self-attention-2007.06486"/></url>
<url><loc>https://scifaro.com/en/abs/sudo-rm-rf-efficient-networks-for-universal-audio-source-separation-2007.06833</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sudo-rm-rf-efficient-networks-for-universal-audio-source-separation-2007.06833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sudo-rm-rf-efficient-networks-for-universal-audio-source-separation-2007.06833"/></url>
<url><loc>https://scifaro.com/en/abs/deep-transformer-based-data-augmentation-with-subword-units-for-morphologically-rich-online-asr-2007.06949</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-transformer-based-data-augmentation-with-subword-units-for-morphologically-rich-online-asr-2007.06949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-transformer-based-data-augmentation-with-subword-units-for-morphologically-rich-online-asr-2007.06949"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-and-an-extensive-evaluation-of-popular-audio-declipping-methods-2007.07663</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-and-an-extensive-evaluation-of-popular-audio-declipping-methods-2007.07663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-and-an-extensive-evaluation-of-popular-audio-declipping-methods-2007.07663"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-domain-balanced-hard-prototype-mining-and-language-dependent-score-normalization-2007.07689</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-domain-balanced-hard-prototype-mining-and-language-dependent-score-normalization-2007.07689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-speaker-verification-with-domain-balanced-hard-prototype-mining-and-language-dependent-score-normalization-2007.07689"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-of-convolutional-neural-networks-for-audio-classification-2007.07966</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-of-convolutional-neural-networks-for-audio-classification-2007.07966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-of-convolutional-neural-networks-for-audio-classification-2007.07966"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-enhanced-speaker-enrollment-for-text-dependent-speaker-verification-2007.08004</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-enhanced-speaker-enrollment-for-text-dependent-speaker-verification-2007.08004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-enhanced-speaker-enrollment-for-text-dependent-speaker-verification-2007.08004"/></url>
<url><loc>https://scifaro.com/en/abs/xiaomingbot-a-multilingual-robot-news-reporter-2007.08005</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xiaomingbot-a-multilingual-robot-news-reporter-2007.08005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xiaomingbot-a-multilingual-robot-news-reporter-2007.08005"/></url>
<url><loc>https://scifaro.com/en/abs/translate-reverberated-speech-to-anechoic-ones-speech-dereverberation-with-bert-2007.08052</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/translate-reverberated-speech-to-anechoic-ones-speech-dereverberation-with-bert-2007.08052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/translate-reverberated-speech-to-anechoic-ones-speech-dereverberation-with-bert-2007.08052"/></url>
<url><loc>https://scifaro.com/en/abs/audio-tagging-by-cross-filtering-noisy-labels-2007.08165</loc><lastmod>2020-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-tagging-by-cross-filtering-noisy-labels-2007.08165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-tagging-by-cross-filtering-noisy-labels-2007.08165"/></url>
<url><loc>https://scifaro.com/en/abs/neural-mos-prediction-for-synthesized-speech-using-multi-task-learning-with-spoofing-detection-and-spoofing-type-classification-2007.08267</loc><lastmod>2020-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-mos-prediction-for-synthesized-speech-using-multi-task-learning-with-spoofing-detection-and-spoofing-type-classification-2007.08267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-mos-prediction-for-synthesized-speech-using-multi-task-learning-with-spoofing-detection-and-spoofing-type-classification-2007.08267"/></url>
<url><loc>https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-based-on-two-stage-categorization-and-data-augmentation-2007.08389</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-based-on-two-stage-categorization-and-data-augmentation-2007.08389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/device-robust-acoustic-scene-classification-based-on-two-stage-categorization-and-data-augmentation-2007.08389"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2007.08818</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2007.08818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2007.08818"/></url>
<url><loc>https://scifaro.com/en/abs/ctc-segmentation-of-large-corpora-for-german-end-to-end-speech-recognition-2007.09127</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctc-segmentation-of-large-corpora-for-german-end-to-end-speech-recognition-2007.09127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctc-segmentation-of-large-corpora-for-german-end-to-end-speech-recognition-2007.09127"/></url>
<url><loc>https://scifaro.com/en/abs/skipconvnet-skip-convolutional-neural-network-for-speech-dereverberation-using-optimally-smoothed-spectral-mapping-2007.09131</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skipconvnet-skip-convolutional-neural-network-for-speech-dereverberation-using-optimally-smoothed-spectral-mapping-2007.09131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skipconvnet-skip-convolutional-neural-network-for-speech-dereverberation-using-optimally-smoothed-spectral-mapping-2007.09131"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-reslstm-with-causal-mean-aggregation-for-device-directed-utterance-detection-2007.09245</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-reslstm-with-causal-mean-aggregation-for-device-directed-utterance-detection-2007.09245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-reslstm-with-causal-mean-aggregation-for-device-directed-utterance-detection-2007.09245"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-with-latent-space-clustering-in-generative-adversarial-network-for-speaker-diarization-2007.09635</loc><lastmod>2020-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-with-latent-space-clustering-in-generative-adversarial-network-for-speaker-diarization-2007.09635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-with-latent-space-clustering-in-generative-adversarial-network-for-speaker-diarization-2007.09635"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-neighbor-embeddings-2007.10329</loc><lastmod>2022-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-neighbor-embeddings-2007.10329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-neighbor-embeddings-2007.10329"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multi-metric-learning-for-text-independent-speaker-verification-2007.10479</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multi-metric-learning-for-text-independent-speaker-verification-2007.10479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multi-metric-learning-for-text-independent-speaker-verification-2007.10479"/></url>
<url><loc>https://scifaro.com/en/abs/cslnspeech-solving-extended-speech-separation-problem-with-the-help-of-chinese-sign-language-2007.10629</loc><lastmod>2023-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cslnspeech-solving-extended-speech-separation-problem-with-the-help-of-chinese-sign-language-2007.10629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cslnspeech-solving-extended-speech-separation-problem-with-the-help-of-chinese-sign-language-2007.10629"/></url>
<url><loc>https://scifaro.com/en/abs/very-fast-keyword-spotting-system-with-real-time-factor-below-0-01-2007.10706</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/very-fast-keyword-spotting-system-with-real-time-factor-below-0-01-2007.10706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/very-fast-keyword-spotting-system-with-real-time-factor-below-0-01-2007.10706"/></url>
<url><loc>https://scifaro.com/en/abs/audio-adversarial-examples-for-robust-hybrid-ctc-attention-speech-recognition-2007.10723</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-adversarial-examples-for-robust-hybrid-ctc-attention-speech-recognition-2007.10723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-adversarial-examples-for-robust-hybrid-ctc-attention-speech-recognition-2007.10723"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-data-driven-filterbank-for-automatic-speaker-verification-2007.10729</loc><lastmod>2020-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-data-driven-filterbank-for-automatic-speaker-verification-2007.10729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-data-driven-filterbank-for-automatic-speaker-verification-2007.10729"/></url>
<url><loc>https://scifaro.com/en/abs/3d-localization-of-a-sound-source-using-mobile-microphone-arrays-referenced-by-slam-2007.11079</loc><lastmod>2020-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-localization-of-a-sound-source-using-mobile-microphone-arrays-referenced-by-slam-2007.11079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-localization-of-a-sound-source-using-mobile-microphone-arrays-referenced-by-slam-2007.11079"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-speech-mask-estimation-for-multi-channel-speech-enhancement-2007.11477</loc><lastmod>2020-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-speech-mask-estimation-for-multi-channel-speech-enhancement-2007.11477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-speech-mask-estimation-for-multi-channel-speech-enhancement-2007.11477"/></url>
<url><loc>https://scifaro.com/en/abs/a-transfer-learning-end-to-end-arabictext-to-speech-tts-deep-architecture-2007.11541</loc><lastmod>2020-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transfer-learning-end-to-end-arabictext-to-speech-tts-deep-architecture-2007.11541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transfer-learning-end-to-end-arabictext-to-speech-tts-deep-architecture-2007.11541"/></url>
<url><loc>https://scifaro.com/en/abs/sequential-routing-framework-fully-capsule-network-based-speech-recognition-2007.11747</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequential-routing-framework-fully-capsule-network-based-speech-recognition-2007.11747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequential-routing-framework-fully-capsule-network-based-speech-recognition-2007.11747"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-translation-and-mixed-source-model-for-virtual-applications-with-perceptual-validation-2007.11795</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-translation-and-mixed-source-model-for-virtual-applications-with-perceptual-validation-2007.11795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-translation-and-mixed-source-model-for-virtual-applications-with-perceptual-validation-2007.11795"/></url>
<url><loc>https://scifaro.com/en/abs/version-control-of-speaker-recognition-systems-2007.12069</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/version-control-of-speaker-recognition-systems-2007.12069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/version-control-of-speaker-recognition-systems-2007.12069"/></url>
<url><loc>https://scifaro.com/en/abs/dereverberation-using-joint-estimation-of-dry-speech-signal-and-acoustic-system-2007.12581</loc><lastmod>2020-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dereverberation-using-joint-estimation-of-dry-speech-signal-and-acoustic-system-2007.12581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dereverberation-using-joint-estimation-of-dry-speech-signal-and-acoustic-system-2007.12581"/></url>
<url><loc>https://scifaro.com/en/abs/mp3-compression-to-diminish-adversarial-noise-in-end-to-end-speech-recognition-2007.12892</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mp3-compression-to-diminish-adversarial-noise-in-end-to-end-speech-recognition-2007.12892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mp3-compression-to-diminish-adversarial-noise-in-end-to-end-speech-recognition-2007.12892"/></url>
<url><loc>https://scifaro.com/en/abs/non-parallel-emotion-conversion-using-a-deep-generative-hybrid-network-and-an-adversarial-pair-discriminator-2007.12932</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-parallel-emotion-conversion-using-a-deep-generative-hybrid-network-and-an-adversarial-pair-discriminator-2007.12932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-parallel-emotion-conversion-using-a-deep-generative-hybrid-network-and-an-adversarial-pair-discriminator-2007.12932"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-emotion-conversion-via-latent-variable-regularization-and-a-chained-encoder-decoder-predictor-network-2007.12937</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-emotion-conversion-via-latent-variable-regularization-and-a-chained-encoder-decoder-predictor-network-2007.12937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-emotion-conversion-via-latent-variable-regularization-and-a-chained-encoder-decoder-predictor-network-2007.12937"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-isa-with-auxiliary-variables-for-learning-speech-representations-2007.12948</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-isa-with-auxiliary-variables-for-learning-speech-representations-2007.12948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-isa-with-auxiliary-variables-for-learning-speech-representations-2007.12948"/></url>
<url><loc>https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-a-non-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.12955</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-a-non-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.12955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quasi-periodic-parallel-wavegan-a-non-autoregressive-raw-waveform-generative-model-with-pitch-dependent-dilated-convolution-neural-network-2007.12955"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-subword-modeling-using-autoregressive-pretraining-and-cross-lingual-phone-aware-modeling-2007.13002</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-subword-modeling-using-autoregressive-pretraining-and-cross-lingual-phone-aware-modeling-2007.13002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-subword-modeling-using-autoregressive-pretraining-and-cross-lingual-phone-aware-modeling-2007.13002"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-deep-hybrid-tensor-to-vector-network-architectures-for-regression-based-speech-enhancement-2007.13024</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-deep-hybrid-tensor-to-vector-network-architectures-for-regression-based-speech-enhancement-2007.13024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-deep-hybrid-tensor-to-vector-network-architectures-for-regression-based-speech-enhancement-2007.13024"/></url>
<url><loc>https://scifaro.com/en/abs/self-expressing-autoencoders-for-unsupervised-spoken-term-discovery-2007.13033</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-expressing-autoencoders-for-unsupervised-spoken-term-discovery-2007.13033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-expressing-autoencoders-for-unsupervised-spoken-term-discovery-2007.13033"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-spoofing-detection-with-raw-waveform-cldnns-2007.13060</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-spoofing-detection-with-raw-waveform-cldnns-2007.13060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-spoofing-detection-with-raw-waveform-cldnns-2007.13060"/></url>
<url><loc>https://scifaro.com/en/abs/uiai-system-for-short-duration-speaker-verification-challenge-2020-2007.13118</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uiai-system-for-short-duration-speaker-verification-challenge-2020-2007.13118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uiai-system-for-short-duration-speaker-verification-challenge-2020-2007.13118"/></url>
<url><loc>https://scifaro.com/en/abs/double-multi-head-attention-for-speaker-verification-2007.13199</loc><lastmod>2021-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/double-multi-head-attention-for-speaker-verification-2007.13199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/double-multi-head-attention-for-speaker-verification-2007.13199"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-audio-fingerprinting-features-for-speech-enhancement-with-generative-adversarial-network-2007.13258</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-audio-fingerprinting-features-for-speech-enhancement-with-generative-adversarial-network-2007.13258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-audio-fingerprinting-features-for-speech-enhancement-with-generative-adversarial-network-2007.13258"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-emotional-content-in-indian-political-speeches-2007.13325</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-emotional-content-in-indian-political-speeches-2007.13325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-emotional-content-in-indian-political-speeches-2007.13325"/></url>
<url><loc>https://scifaro.com/en/abs/self-attentive-multi-layer-aggregation-with-feature-recalibration-and-normalization-for-end-to-end-speaker-verification-system-2007.13350</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attentive-multi-layer-aggregation-with-feature-recalibration-and-normalization-for-end-to-end-speaker-verification-system-2007.13350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attentive-multi-layer-aggregation-with-feature-recalibration-and-normalization-for-end-to-end-speaker-verification-system-2007.13350"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-unsupervised-phoneme-segmentation-2007.13465</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-unsupervised-phoneme-segmentation-2007.13465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-contrastive-learning-for-unsupervised-phoneme-segmentation-2007.13465"/></url>
<url><loc>https://scifaro.com/en/abs/receptive-field-regularized-cnns-for-music-classification-and-tagging-2007.13503</loc><lastmod>2020-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/receptive-field-regularized-cnns-for-music-classification-and-tagging-2007.13503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/receptive-field-regularized-cnns-for-music-classification-and-tagging-2007.13503"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-the-reliability-of-acoustic-speech-embeddings-2007.13542</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-the-reliability-of-acoustic-speech-embeddings-2007.13542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-the-reliability-of-acoustic-speech-embeddings-2007.13542"/></url>
<url><loc>https://scifaro.com/en/abs/from-sound-representation-to-model-robustness-2007.13703</loc><lastmod>2021-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-sound-representation-to-model-robustness-2007.13703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-sound-representation-to-model-robustness-2007.13703"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-minimum-word-error-rate-training-of-rnn-transducer-for-end-to-end-speech-recognition-2007.13802</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-minimum-word-error-rate-training-of-rnn-transducer-for-end-to-end-speech-recognition-2007.13802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-minimum-word-error-rate-training-of-rnn-transducer-for-end-to-end-speech-recognition-2007.13802"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-with-data-augmentation-for-end-to-end-asr-2007.13876</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-with-data-augmentation-for-end-to-end-asr-2007.13876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-with-data-augmentation-for-end-to-end-asr-2007.13876"/></url>
<url><loc>https://scifaro.com/en/abs/neural-kalman-filtering-for-speech-enhancement-2007.13962</loc><lastmod>2021-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-kalman-filtering-for-speech-enhancement-2007.13962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-kalman-filtering-for-speech-enhancement-2007.13962"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-transformer-network-direct-context-aware-modeling-for-end-to-end-monaural-speech-separation-2007.13975</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-transformer-network-direct-context-aware-modeling-for-end-to-end-monaural-speech-separation-2007.13975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-transformer-network-direct-context-aware-modeling-for-end-to-end-monaural-speech-separation-2007.13975"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-x-vector-reconstruction-for-domain-adapted-speaker-recognition-2007.14146</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-x-vector-reconstruction-for-domain-adapted-speaker-recognition-2007.14146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-x-vector-reconstruction-for-domain-adapted-speaker-recognition-2007.14146"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-and-analysing-spontaneous-oral-cancer-speech-in-the-wild-2007.14205</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-and-analysing-spontaneous-oral-cancer-speech-in-the-wild-2007.14205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-and-analysing-spontaneous-oral-cancer-speech-in-the-wild-2007.14205"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-integration-for-large-vocabulary-audio-visual-speech-recognition-2007.14223</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-integration-for-large-vocabulary-audio-visual-speech-recognition-2007.14223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-integration-for-large-vocabulary-audio-visual-speech-recognition-2007.14223"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-approach-to-audio-to-score-alignment-2007.14333</loc><lastmod>2020-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-approach-to-audio-to-score-alignment-2007.14333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-approach-to-audio-to-score-alignment-2007.14333"/></url>
<url><loc>https://scifaro.com/en/abs/autosegmental-neural-nets-should-phones-and-tones-be-synchronous-or-asynchronous-2007.14351</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autosegmental-neural-nets-should-phones-and-tones-be-synchronous-or-asynchronous-2007.14351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autosegmental-neural-nets-should-phones-and-tones-be-synchronous-or-asynchronous-2007.14351"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-keyword-spotting-with-prototypical-networks-2007.14463</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-keyword-spotting-with-prototypical-networks-2007.14463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-keyword-spotting-with-prototypical-networks-2007.14463"/></url>
<url><loc>https://scifaro.com/en/abs/autoclip-adaptive-gradient-clipping-for-source-separation-networks-2007.14469</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoclip-adaptive-gradient-clipping-for-source-separation-networks-2007.14469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoclip-adaptive-gradient-clipping-for-source-separation-networks-2007.14469"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-no-reference-pstn-speech-quality-prediction-2007.14598</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-no-reference-pstn-speech-quality-prediction-2007.14598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-no-reference-pstn-speech-quality-prediction-2007.14598"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-unsupervised-pre-training-for-acoustic-representation-learning-2007.14602</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-unsupervised-pre-training-for-acoustic-representation-learning-2007.14602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-unsupervised-pre-training-for-acoustic-representation-learning-2007.14602"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-adversarial-white-box-attacks-on-music-instrument-classification-2007.14714</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-adversarial-white-box-attacks-on-music-instrument-classification-2007.14714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-adversarial-white-box-attacks-on-music-instrument-classification-2007.14714"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-generative-adversarial-alignment-representation-for-sheet-music-audio-and-lyrics-2007.14856</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-generative-adversarial-alignment-representation-for-sheet-music-audio-and-lyrics-2007.14856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-generative-adversarial-alignment-representation-for-sheet-music-audio-and-lyrics-2007.14856"/></url>
<url><loc>https://scifaro.com/en/abs/on-loss-functions-and-recurrency-training-for-gan-based-speech-enhancement-systems-2007.14974</loc><lastmod>2020-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-loss-functions-and-recurrency-training-for-gan-based-speech-enhancement-systems-2007.14974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-loss-functions-and-recurrency-training-for-gan-based-speech-enhancement-systems-2007.14974"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-phase-distortion-on-perceived-speech-quality-for-hearing-impaired-listeners-2007.14986</loc><lastmod>2020-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-phase-distortion-on-perceived-speech-quality-for-hearing-impaired-listeners-2007.14986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-phase-distortion-on-perceived-speech-quality-for-hearing-impaired-listeners-2007.14986"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-preserving-voice-analysis-via-disentangled-representations-2007.15064</loc><lastmod>2020-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-preserving-voice-analysis-via-disentangled-representations-2007.15064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-preserving-voice-analysis-via-disentangled-representations-2007.15064"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-cross-lingual-knowledge-in-unsupervised-acoustic-modeling-for-low-resource-languages-2007.15074</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-cross-lingual-knowledge-in-unsupervised-acoustic-modeling-for-low-resource-languages-2007.15074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-cross-lingual-knowledge-in-unsupervised-acoustic-modeling-for-low-resource-languages-2007.15074"/></url>
<url><loc>https://scifaro.com/en/abs/developing-rnn-t-models-surpassing-high-performance-hybrid-models-with-customization-capability-2007.15188</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-rnn-t-models-surpassing-high-performance-hybrid-models-with-customization-capability-2007.15188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-rnn-t-models-surpassing-high-performance-hybrid-models-with-customization-capability-2007.15188"/></url>
<url><loc>https://scifaro.com/en/abs/vocgan-a-high-fidelity-real-time-vocoder-with-a-hierarchically-nested-adversarial-network-2007.15256</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocgan-a-high-fidelity-real-time-vocoder-with-a-hierarchically-nested-adversarial-network-2007.15256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocgan-a-high-fidelity-real-time-vocoder-with-a-hierarchically-nested-adversarial-network-2007.15256"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-speed-control-of-end-to-end-speech-synthesis-using-sentence-level-conditioning-2007.15281</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-speed-control-of-end-to-end-speech-synthesis-using-sentence-level-conditioning-2007.15281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-speed-control-of-end-to-end-speech-synthesis-using-sentence-level-conditioning-2007.15281"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-re-assessment-of-feature-extractors-for-deep-speaker-embeddings-2007.15283</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-re-assessment-of-feature-extractors-for-deep-speaker-embeddings-2007.15283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-re-assessment-of-feature-extractors-for-deep-speaker-embeddings-2007.15283"/></url>
<url><loc>https://scifaro.com/en/abs/music-fadernets-controllable-music-generation-based-on-high-level-features-via-low-level-feature-modelling-2007.15474</loc><lastmod>2020-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-fadernets-controllable-music-generation-based-on-high-level-features-via-low-level-feature-modelling-2007.15474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-fadernets-controllable-music-generation-based-on-high-level-features-via-low-level-feature-modelling-2007.15474"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-distrust-towards-the-skills-of-a-virtual-assistant-using-speech-2007.15711</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-distrust-towards-the-skills-of-a-virtual-assistant-using-speech-2007.15711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-distrust-towards-the-skills-of-a-virtual-assistant-using-speech-2007.15711"/></url>
<url><loc>https://scifaro.com/en/abs/a-pyramid-recurrent-network-for-predicting-crowdsourced-speech-quality-ratings-of-real-world-signals-2007.15797</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pyramid-recurrent-network-for-predicting-crowdsourced-speech-quality-ratings-of-real-world-signals-2007.15797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pyramid-recurrent-network-for-predicting-crowdsourced-speech-quality-ratings-of-real-world-signals-2007.15797"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-wise-meeting-transcription-system-using-asynchronous-distributed-microphones-2007.15868</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-wise-meeting-transcription-system-using-asynchronous-distributed-microphones-2007.15868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-wise-meeting-transcription-system-using-asynchronous-distributed-microphones-2007.15868"/></url>
<url><loc>https://scifaro.com/en/abs/designing-neural-speaker-embeddings-with-meta-learning-2007.16196</loc><lastmod>2020-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/designing-neural-speaker-embeddings-with-meta-learning-2007.16196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/designing-neural-speaker-embeddings-with-meta-learning-2007.16196"/></url>
<url><loc>https://scifaro.com/en/abs/an-acoustic-segment-model-based-segment-unit-selection-approach-to-acoustic-scene-classification-with-partial-utterances-2008.00107</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-acoustic-segment-model-based-segment-unit-selection-approach-to-acoustic-scene-classification-with-partial-utterances-2008.00107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-acoustic-segment-model-based-segment-unit-selection-approach-to-acoustic-scene-classification-with-partial-utterances-2008.00107"/></url>
<url><loc>https://scifaro.com/en/abs/relational-teacher-student-learning-with-neural-label-embedding-for-device-adaptation-in-acoustic-scene-classification-2008.00110</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relational-teacher-student-learning-with-neural-label-embedding-for-device-adaptation-in-acoustic-scene-classification-2008.00110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relational-teacher-student-learning-with-neural-label-embedding-for-device-adaptation-in-acoustic-scene-classification-2008.00110"/></url>
<url><loc>https://scifaro.com/en/abs/neural-text-to-speech-with-a-modeling-by-generation-excitation-vocoder-2008.00132</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-text-to-speech-with-a-modeling-by-generation-excitation-vocoder-2008.00132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-text-to-speech-with-a-modeling-by-generation-excitation-vocoder-2008.00132"/></url>
<url><loc>https://scifaro.com/en/abs/singer-identification-using-convolutional-acoustic-motif-embeddings-2008.00198</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singer-identification-using-convolutional-acoustic-motif-embeddings-2008.00198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singer-identification-using-convolutional-acoustic-motif-embeddings-2008.00198"/></url>
<url><loc>https://scifaro.com/en/abs/score-informed-networks-for-music-performance-assessment-2008.00203</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-informed-networks-for-music-performance-assessment-2008.00203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-informed-networks-for-music-performance-assessment-2008.00203"/></url>
<url><loc>https://scifaro.com/en/abs/neural-ode-with-temporal-convolution-and-time-delay-neural-networks-for-small-footprint-keyword-spotting-2008.00209</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-ode-with-temporal-convolution-and-time-delay-neural-networks-for-small-footprint-keyword-spotting-2008.00209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-ode-with-temporal-convolution-and-time-delay-neural-networks-for-small-footprint-keyword-spotting-2008.00209"/></url>
<url><loc>https://scifaro.com/en/abs/dccrn-deep-complex-convolution-recurrent-network-for-phase-aware-speech-enhancement-2008.00264</loc><lastmod>2020-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dccrn-deep-complex-convolution-recurrent-network-for-phase-aware-speech-enhancement-2008.00264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dccrn-deep-complex-convolution-recurrent-network-for-phase-aware-speech-enhancement-2008.00264"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-adaptation-of-spoken-language-identification-for-related-languages-the-curious-case-of-slavic-languages-2008.00545</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-adaptation-of-spoken-language-identification-for-related-languages-the-curious-case-of-slavic-languages-2008.00545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-adaptation-of-spoken-language-identification-for-related-languages-the-curious-case-of-slavic-languages-2008.00545"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-deep-sentential-context-for-expressive-end-to-end-speech-synthesis-2008.00613</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-deep-sentential-context-for-expressive-end-to-end-speech-synthesis-2008.00613"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-deep-sentential-context-for-expressive-end-to-end-speech-synthesis-2008.00613"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-learning-for-instrument-activation-aware-music-source-separation-2008.00616</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-learning-for-instrument-activation-aware-music-source-separation-2008.00616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-learning-for-instrument-activation-aware-music-source-separation-2008.00616"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-speech-synthesis-using-tacotron2-2008.00620</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-speech-synthesis-using-tacotron2-2008.00620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-speech-synthesis-using-tacotron2-2008.00620"/></url>
<url><loc>https://scifaro.com/en/abs/learning-intonation-pattern-embeddings-for-arabic-dialect-identification-2008.00667</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-intonation-pattern-embeddings-for-arabic-dialect-identification-2008.00667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-intonation-pattern-embeddings-for-arabic-dialect-identification-2008.00667"/></url>
<url><loc>https://scifaro.com/en/abs/tutornet-towards-flexible-knowledge-distillation-for-end-to-end-speech-recognition-2008.00671</loc><lastmod>2021-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tutornet-towards-flexible-knowledge-distillation-for-end-to-end-speech-recognition-2008.00671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tutornet-towards-flexible-knowledge-distillation-for-end-to-end-speech-recognition-2008.00671"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-semi-supervised-learning-framework-for-punctuation-prediction-in-conversational-speech-2008.00702</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-semi-supervised-learning-framework-for-punctuation-prediction-in-conversational-speech-2008.00702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-semi-supervised-learning-framework-for-punctuation-prediction-in-conversational-speech-2008.00702"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-discovery-of-recurring-speech-patterns-using-probabilistic-adaptive-metrics-2008.00731</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-discovery-of-recurring-speech-patterns-using-probabilistic-adaptive-metrics-2008.00731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-discovery-of-recurring-speech-patterns-using-probabilistic-adaptive-metrics-2008.00731"/></url>
<url><loc>https://scifaro.com/en/abs/structure-and-automatic-segmentation-of-dhrupad-vocal-bandish-audio-2008.00756</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structure-and-automatic-segmentation-of-dhrupad-vocal-bandish-audio-2008.00756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structure-and-automatic-segmentation-of-dhrupad-vocal-bandish-audio-2008.00756"/></url>
<url><loc>https://scifaro.com/en/abs/one-model-many-languages-meta-learning-for-multilingual-text-to-speech-2008.00768</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-model-many-languages-meta-learning-for-multilingual-text-to-speech-2008.00768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-model-many-languages-meta-learning-for-multilingual-text-to-speech-2008.00768"/></url>
<url><loc>https://scifaro.com/en/abs/musicoder-a-universal-music-acoustic-encoder-based-on-transformers-2008.00781</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicoder-a-universal-music-acoustic-encoder-based-on-transformers-2008.00781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicoder-a-universal-music-acoustic-encoder-based-on-transformers-2008.00781"/></url>
<url><loc>https://scifaro.com/en/abs/evolving-multi-resolution-pooling-cnn-for-monaural-singing-voice-separation-2008.00816</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolving-multi-resolution-pooling-cnn-for-monaural-singing-voice-separation-2008.00816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolving-multi-resolution-pooling-cnn-for-monaural-singing-voice-separation-2008.00816"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-dependent-articulatory-to-acoustic-mapping-using-real-time-mri-of-the-vocal-tract-2008.00889</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-dependent-articulatory-to-acoustic-mapping-using-real-time-mri-of-the-vocal-tract-2008.00889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-dependent-articulatory-to-acoustic-mapping-using-real-time-mri-of-the-vocal-tract-2008.00889"/></url>
<url><loc>https://scifaro.com/en/abs/modular-end-to-end-automatic-speech-recognition-framework-for-acoustic-to-word-model-2008.00953</loc><lastmod>2020-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modular-end-to-end-automatic-speech-recognition-framework-for-acoustic-to-word-model-2008.00953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modular-end-to-end-automatic-speech-recognition-framework-for-acoustic-to-word-model-2008.00953"/></url>
<url><loc>https://scifaro.com/en/abs/self-attention-encoding-and-pooling-for-speaker-recognition-2008.01077</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-attention-encoding-and-pooling-for-speaker-recognition-2008.01077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-attention-encoding-and-pooling-for-speaker-recognition-2008.01077"/></url>
<url><loc>https://scifaro.com/en/abs/a-spectral-energy-distance-for-parallel-speech-synthesis-2008.01160</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-spectral-energy-distance-for-parallel-speech-synthesis-2008.01160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-spectral-energy-distance-for-parallel-speech-synthesis-2008.01160"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-construction-of-asr-systems-with-massive-video-data-2008.01300</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-construction-of-asr-systems-with-massive-video-data-2008.01300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-construction-of-asr-systems-with-massive-video-data-2008.01300"/></url>
<url><loc>https://scifaro.com/en/abs/intra-class-variation-reduction-of-speaker-representation-in-disentanglement-framework-2008.01348</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intra-class-variation-reduction-of-speaker-representation-in-disentanglement-framework-2008.01348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intra-class-variation-reduction-of-speaker-representation-in-disentanglement-framework-2008.01348"/></url>
<url><loc>https://scifaro.com/en/abs/this-is-houston-say-again-please-the-behavox-system-for-the-apollo-11-fearless-steps-challenge-phase-ii-2008.01504</loc><lastmod>2020-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/this-is-houston-say-again-please-the-behavox-system-for-the-apollo-11-fearless-steps-challenge-phase-ii-2008.01504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/this-is-houston-say-again-please-the-behavox-system-for-the-apollo-11-fearless-steps-challenge-phase-ii-2008.01504"/></url>
<url><loc>https://scifaro.com/en/abs/mirnet-learning-multiple-identities-representations-in-overlapped-speech-2008.01698</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mirnet-learning-multiple-identities-representations-in-overlapped-speech-2008.01698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mirnet-learning-multiple-identities-representations-in-overlapped-speech-2008.01698"/></url>
<url><loc>https://scifaro.com/en/abs/future-vector-enhanced-lstm-language-model-for-lvcsr-2008.01832</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/future-vector-enhanced-lstm-language-model-for-lvcsr-2008.01832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/future-vector-enhanced-lstm-language-model-for-lvcsr-2008.01832"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-denoise-historical-music-2008.02027</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-denoise-historical-music-2008.02027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-denoise-historical-music-2008.02027"/></url>
<url><loc>https://scifaro.com/en/abs/content-based-singing-voice-source-separation-via-strong-conditioning-using-aligned-phonemes-2008.02070</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-based-singing-voice-source-separation-via-strong-conditioning-using-aligned-phonemes-2008.02070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-based-singing-voice-source-separation-via-strong-conditioning-using-aligned-phonemes-2008.02070"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-dependent-acoustic-to-articulatory-inversion-using-real-time-mri-of-the-vocal-tract-2008.02098</loc><lastmod>2020-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-dependent-acoustic-to-articulatory-inversion-using-real-time-mri-of-the-vocal-tract-2008.02098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-dependent-acoustic-to-articulatory-inversion-using-real-time-mri-of-the-vocal-tract-2008.02098"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-transformer-ctc-networks-for-hardware-efficient-voice-triggering-2008.02323</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-transformer-ctc-networks-for-hardware-efficient-voice-triggering-2008.02323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-transformer-ctc-networks-for-hardware-efficient-voice-triggering-2008.02323"/></url>
<url><loc>https://scifaro.com/en/abs/recognition-synthesis-based-non-parallel-voice-conversion-with-adversarial-learning-2008.02371</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognition-synthesis-based-non-parallel-voice-conversion-with-adversarial-learning-2008.02371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognition-synthesis-based-non-parallel-voice-conversion-with-adversarial-learning-2008.02371"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-measurement-of-time-invariant-linear-and-nonlinear-and-random-and-extra-responses-using-frequency-domain-variant-of-velvet-noise-2008.02439</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-measurement-of-time-invariant-linear-and-nonlinear-and-random-and-extra-responses-using-frequency-domain-variant-of-velvet-noise-2008.02439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-measurement-of-time-invariant-linear-and-nonlinear-and-random-and-extra-responses-using-frequency-domain-variant-of-velvet-noise-2008.02439"/></url>
<url><loc>https://scifaro.com/en/abs/quantification-of-transducer-misalignment-in-ultrasound-tongue-imaging-2008.02470</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantification-of-transducer-misalignment-in-ultrasound-tongue-imaging-2008.02470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantification-of-transducer-misalignment-in-ultrasound-tongue-imaging-2008.02470"/></url>
<url><loc>https://scifaro.com/en/abs/mixing-specific-data-augmentation-techniques-for-improved-blind-violin-piano-source-separation-2008.02480</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixing-specific-data-augmentation-techniques-for-improved-blind-violin-piano-source-separation-2008.02480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixing-specific-data-augmentation-techniques-for-improved-blind-violin-piano-source-separation-2008.02480"/></url>
<url><loc>https://scifaro.com/en/abs/shouted-speech-compensation-for-speaker-verification-robust-to-vocal-effort-conditions-2008.02487</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shouted-speech-compensation-for-speaker-verification-robust-to-vocal-effort-conditions-2008.02487"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shouted-speech-compensation-for-speaker-verification-robust-to-vocal-effort-conditions-2008.02487"/></url>
<url><loc>https://scifaro.com/en/abs/ppspeech-phrase-based-parallel-end-to-end-tts-system-2008.02490</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ppspeech-phrase-based-parallel-end-to-end-tts-system-2008.02490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ppspeech-phrase-based-parallel-end-to-end-tts-system-2008.02490"/></url>
<url><loc>https://scifaro.com/en/abs/hooligan-robust-high-quality-neural-vocoding-2008.02493</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hooligan-robust-high-quality-neural-vocoding-2008.02493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hooligan-robust-high-quality-neural-vocoding-2008.02493"/></url>
<url><loc>https://scifaro.com/en/abs/fastlr-non-autoregressive-lipreading-model-with-integrate-and-fire-2008.02516</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastlr-non-autoregressive-lipreading-model-with-integrate-and-fire-2008.02516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastlr-non-autoregressive-lipreading-model-with-integrate-and-fire-2008.02516"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-change-enhancement-with-prior-snr-for-the-hearing-impaired-2008.02519</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-change-enhancement-with-prior-snr-for-the-hearing-impaired-2008.02519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-change-enhancement-with-prior-snr-for-the-hearing-impaired-2008.02519"/></url>
<url><loc>https://scifaro.com/en/abs/data-balancing-for-boosting-performance-of-low-frequency-classes-in-spoken-language-understanding-2008.02603</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-balancing-for-boosting-performance-of-low-frequency-classes-in-spoken-language-understanding-2008.02603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-balancing-for-boosting-performance-of-low-frequency-classes-in-spoken-language-understanding-2008.02603"/></url>
<url><loc>https://scifaro.com/en/abs/improving-on-device-speaker-verification-using-federated-learning-with-privacy-2008.02651</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-on-device-speaker-verification-using-federated-learning-with-privacy-2008.02651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-on-device-speaker-verification-using-federated-learning-with-privacy-2008.02651"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-fusion-enhanced-audio-visual-encoding-for-transformer-based-robust-speech-recognition-2008.02686</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-fusion-enhanced-audio-visual-encoding-for-transformer-based-robust-speech-recognition-2008.02686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-fusion-enhanced-audio-visual-encoding-for-transformer-based-robust-speech-recognition-2008.02686"/></url>
<url><loc>https://scifaro.com/en/abs/aalto-s-end-to-end-dnn-systems-for-the-interspeech-2020-computational-paralinguistics-challenge-2008.02689</loc><lastmod>2020-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aalto-s-end-to-end-dnn-systems-for-the-interspeech-2020-computational-paralinguistics-challenge-2008.02689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aalto-s-end-to-end-dnn-systems-for-the-interspeech-2020-computational-paralinguistics-challenge-2008.02689"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-cross-domain-singing-voice-conversion-2008.02830</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-cross-domain-singing-voice-conversion-2008.02830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-cross-domain-singing-voice-conversion-2008.02830"/></url>
<url><loc>https://scifaro.com/en/abs/a-transfer-learning-method-for-speech-emotion-recognition-from-automatic-speech-recognition-2008.02863</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transfer-learning-method-for-speech-emotion-recognition-from-automatic-speech-recognition-2008.02863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transfer-learning-method-for-speech-emotion-recognition-from-automatic-speech-recognition-2008.02863"/></url>
<url><loc>https://scifaro.com/en/abs/respiratory-sound-classification-using-long-short-term-memory-2008.02900</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/respiratory-sound-classification-using-long-short-term-memory-2008.02900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/respiratory-sound-classification-using-long-short-term-memory-2008.02900"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-text-to-speech-synthesis-using-deep-gaussian-processes-2008.02950</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-text-to-speech-synthesis-using-deep-gaussian-processes-2008.02950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-text-to-speech-synthesis-using-deep-gaussian-processes-2008.02950"/></url>
<url><loc>https://scifaro.com/en/abs/durian-sc-duration-informed-attention-network-based-singing-voice-conversion-system-2008.03009</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/durian-sc-duration-informed-attention-network-based-singing-voice-conversion-system-2008.03009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/durian-sc-duration-informed-attention-network-based-singing-voice-conversion-system-2008.03009"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-speaker-and-nuisance-attribute-embedding-for-robust-speaker-verification-2008.03024</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-speaker-and-nuisance-attribute-embedding-for-robust-speaker-verification-2008.03024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-speaker-and-nuisance-attribute-embedding-for-robust-speaker-verification-2008.03024"/></url>
<url><loc>https://scifaro.com/en/abs/peking-opera-synthesis-via-duration-informed-attention-network-2008.03029</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peking-opera-synthesis-via-duration-informed-attention-network-2008.03029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peking-opera-synthesis-via-duration-informed-attention-network-2008.03029"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-techniques-for-sequence-to-sequence-voice-conversion-2008.03088</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-techniques-for-sequence-to-sequence-voice-conversion-2008.03088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-techniques-for-sequence-to-sequence-voice-conversion-2008.03088"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-text-to-speech-for-neural-sequence-to-sequence-models-using-reinforcement-learning-2008.03096</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-text-to-speech-for-neural-sequence-to-sequence-models-using-reinforcement-learning-2008.03096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-text-to-speech-for-neural-sequence-to-sequence-models-using-reinforcement-learning-2008.03096"/></url>
<url><loc>https://scifaro.com/en/abs/a-machine-of-few-words-interactive-speaker-recognition-with-reinforcement-learning-2008.03127</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-machine-of-few-words-interactive-speaker-recognition-with-reinforcement-learning-2008.03127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-machine-of-few-words-interactive-speaker-recognition-with-reinforcement-learning-2008.03127"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-based-on-multi-stage-elaborated-dual-path-deep-bilstm-with-auxiliary-identity-loss-2008.03149</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-based-on-multi-stage-elaborated-dual-path-deep-bilstm-with-auxiliary-identity-loss-2008.03149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-based-on-multi-stage-elaborated-dual-path-deep-bilstm-with-auxiliary-identity-loss-2008.03149"/></url>
<url><loc>https://scifaro.com/en/abs/ultrasound-based-articulatory-to-acoustic-mapping-with-waveglow-speech-synthesis-2008.03152</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultrasound-based-articulatory-to-acoustic-mapping-with-waveglow-speech-synthesis-2008.03152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultrasound-based-articulatory-to-acoustic-mapping-with-waveglow-speech-synthesis-2008.03152"/></url>
<url><loc>https://scifaro.com/en/abs/applying-speech-tempo-derived-features-boaw-and-fisher-vectors-to-detect-elderly-emotion-and-speech-in-surgical-masks-2008.03183</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-speech-tempo-derived-features-boaw-and-fisher-vectors-to-detect-elderly-emotion-and-speech-in-surgical-masks-2008.03183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-speech-tempo-derived-features-boaw-and-fisher-vectors-to-detect-elderly-emotion-and-speech-in-surgical-masks-2008.03183"/></url>
<url><loc>https://scifaro.com/en/abs/cuchild-a-large-scale-cantonese-corpus-of-child-speech-for-phonology-and-articulation-assessment-2008.03188</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuchild-a-large-scale-cantonese-corpus-of-child-speech-for-phonology-and-articulation-assessment-2008.03188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuchild-a-large-scale-cantonese-corpus-of-child-speech-for-phonology-and-articulation-assessment-2008.03188"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-phonological-errors-in-child-speech-using-siamese-recurrent-autoencoder-2008.03193</loc><lastmod>2020-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-phonological-errors-in-child-speech-using-siamese-recurrent-autoencoder-2008.03193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-phonological-errors-in-child-speech-using-siamese-recurrent-autoencoder-2008.03193"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-speaker-adaptation-methods-in-transformer-based-asr-2008.03247</loc><lastmod>2021-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-speaker-adaptation-methods-in-transformer-based-asr-2008.03247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-speaker-adaptation-methods-in-transformer-based-asr-2008.03247"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-dereverberation-of-temporal-envelopesfor-robust-speech-recognition-2008.03339</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-dereverberation-of-temporal-envelopesfor-robust-speech-recognition-2008.03339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-dereverberation-of-temporal-envelopesfor-robust-speech-recognition-2008.03339"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-framework-for-audio-tagging-and-weakly-supervised-acoustic-event-detection-using-densenet-with-global-average-pooling-2008.03350</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-framework-for-audio-tagging-and-weakly-supervised-acoustic-event-detection-using-densenet-with-global-average-pooling-2008.03350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-framework-for-audio-tagging-and-weakly-supervised-acoustic-event-detection-using-densenet-with-global-average-pooling-2008.03350"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-approach-to-accent-recognition-and-conversion-for-mandarin-chinese-2008.03359</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-approach-to-accent-recognition-and-conversion-for-mandarin-chinese-2008.03359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-approach-to-accent-recognition-and-conversion-for-mandarin-chinese-2008.03359"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-huntington-disease-using-acoustic-and-lexical-features-2008.03367</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-huntington-disease-using-acoustic-and-lexical-features-2008.03367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-huntington-disease-using-acoustic-and-lexical-features-2008.03367"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-neural-prosody-synthesis-2008.03388</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-neural-prosody-synthesis-2008.03388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-neural-prosody-synthesis-2008.03388"/></url>
<url><loc>https://scifaro.com/en/abs/word-error-rate-estimation-without-asr-output-e-wer2-2008.03403</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/word-error-rate-estimation-without-asr-output-e-wer2-2008.03403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/word-error-rate-estimation-without-asr-output-e-wer2-2008.03403"/></url>
<url><loc>https://scifaro.com/en/abs/stacked-1d-convolutional-networks-for-end-to-end-small-footprint-voice-trigger-detection-2008.03405</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stacked-1d-convolutional-networks-for-end-to-end-small-footprint-voice-trigger-detection-2008.03405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stacked-1d-convolutional-networks-for-end-to-end-small-footprint-voice-trigger-detection-2008.03405"/></url>
<url><loc>https://scifaro.com/en/abs/deep-f-measure-maximization-for-end-to-end-speech-understanding-2008.03425</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-f-measure-maximization-for-end-to-end-speech-understanding-2008.03425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-f-measure-maximization-for-end-to-end-speech-understanding-2008.03425"/></url>
<url><loc>https://scifaro.com/en/abs/audio-spoofing-verification-using-deep-convolutional-neural-networks-by-transfer-learning-2008.03464</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-spoofing-verification-using-deep-convolutional-neural-networks-by-transfer-learning-2008.03464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-spoofing-verification-using-deep-convolutional-neural-networks-by-transfer-learning-2008.03464"/></url>
<url><loc>https://scifaro.com/en/abs/jukebox-a-multilingual-singer-recognition-dataset-2008.03507</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jukebox-a-multilingual-singer-recognition-dataset-2008.03507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jukebox-a-multilingual-singer-recognition-dataset-2008.03507"/></url>
<url><loc>https://scifaro.com/en/abs/context-dependent-rnnlm-for-automatic-transcription-of-conversations-2008.03517</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-dependent-rnnlm-for-automatic-transcription-of-conversations-2008.03517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-dependent-rnnlm-for-automatic-transcription-of-conversations-2008.03517"/></url>
<url><loc>https://scifaro.com/en/abs/npu-speaker-verification-system-for-interspeech-2020-far-field-speaker-verification-challenge-2008.03521</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/npu-speaker-verification-system-for-interspeech-2020-far-field-speaker-verification-challenge-2008.03521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/npu-speaker-verification-system-for-interspeech-2020-far-field-speaker-verification-challenge-2008.03521"/></url>
<url><loc>https://scifaro.com/en/abs/extrapolating-false-alarm-rates-in-automatic-speaker-verification-2008.03590</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extrapolating-false-alarm-rates-in-automatic-speaker-verification-2008.03590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extrapolating-false-alarm-rates-in-automatic-speaker-verification-2008.03590"/></url>
<url><loc>https://scifaro.com/en/abs/speech-driven-talking-face-generation-from-a-single-image-and-an-emotion-condition-2008.03592</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-driven-talking-face-generation-from-a-single-image-and-an-emotion-condition-2008.03592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-driven-talking-face-generation-from-a-single-image-and-an-emotion-condition-2008.03592"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-use-of-an-unsupervised-autoregressive-model-as-a-shared-encoder-for-text-dependent-speaker-verification-2008.03615</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-use-of-an-unsupervised-autoregressive-model-as-a-shared-encoder-for-text-dependent-speaker-verification-2008.03615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-use-of-an-unsupervised-autoregressive-model-as-a-shared-encoder-for-text-dependent-speaker-verification-2008.03615"/></url>
<url><loc>https://scifaro.com/en/abs/variable-frame-rate-based-data-augmentation-to-handle-speaking-style-variability-for-automatic-speaker-verification-2008.03616</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variable-frame-rate-based-data-augmentation-to-handle-speaking-style-variability-for-automatic-speaker-verification-2008.03616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variable-frame-rate-based-data-augmentation-to-handle-speaking-style-variability-for-automatic-speaker-verification-2008.03616"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-discrimination-in-humans-and-machines-effects-of-speaking-style-variability-2008.03617</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-discrimination-in-humans-and-machines-effects-of-speaking-style-variability-2008.03617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-discrimination-in-humans-and-machines-effects-of-speaking-style-variability-2008.03617"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-voice-conversion-and-its-challenges-from-statistical-modeling-to-deep-learning-2008.03648</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-voice-conversion-and-its-challenges-from-statistical-modeling-to-deep-learning-2008.03648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-voice-conversion-and-its-challenges-from-statistical-modeling-to-deep-learning-2008.03648"/></url>
<url><loc>https://scifaro.com/en/abs/lrspeech-extremely-low-resource-speech-synthesis-and-recognition-2008.03687</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lrspeech-extremely-low-resource-speech-synthesis-and-recognition-2008.03687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lrspeech-extremely-low-resource-speech-synthesis-and-recognition-2008.03687"/></url>
<url><loc>https://scifaro.com/en/abs/deep-mos-predictor-for-synthetic-speech-using-cluster-based-modeling-2008.03710</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-mos-predictor-for-synthetic-speech-using-cluster-based-modeling-2008.03710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-mos-predictor-for-synthetic-speech-using-cluster-based-modeling-2008.03710"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-multidimensional-metric-learning-for-music-similarity-2008.03720</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-multidimensional-metric-learning-for-music-similarity-2008.03720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-multidimensional-metric-learning-for-music-similarity-2008.03720"/></url>
<url><loc>https://scifaro.com/en/abs/cosine-distance-virtual-adversarial-training-for-semi-supervised-speaker-discriminative-acoustic-embeddings-2008.03756</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosine-distance-virtual-adversarial-training-for-semi-supervised-speaker-discriminative-acoustic-embeddings-2008.03756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosine-distance-virtual-adversarial-training-for-semi-supervised-speaker-discriminative-acoustic-embeddings-2008.03756"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-detection-of-wake-word-start-and-end-using-a-cnn-2008.03790</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-detection-of-wake-word-start-and-end-using-a-cnn-2008.03790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-detection-of-wake-word-start-and-end-using-a-cnn-2008.03790"/></url>
<url><loc>https://scifaro.com/en/abs/speedyspeech-efficient-neural-speech-synthesis-2008.03802</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speedyspeech-efficient-neural-speech-synthesis-2008.03802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speedyspeech-efficient-neural-speech-synthesis-2008.03802"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speaker-recognition-with-a-cross-modal-discriminative-network-2008.03894</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speaker-recognition-with-a-cross-modal-discriminative-network-2008.03894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speaker-recognition-with-a-cross-modal-discriminative-network-2008.03894"/></url>
<url><loc>https://scifaro.com/en/abs/improving-partition-block-based-acoustic-echo-canceler-in-under-modeling-scenarios-2008.03944</loc><lastmod>2025-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-partition-block-based-acoustic-echo-canceler-in-under-modeling-scenarios-2008.03944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-partition-block-based-acoustic-echo-canceler-in-under-modeling-scenarios-2008.03944"/></url>
<url><loc>https://scifaro.com/en/abs/deep-self-supervised-hierarchical-clustering-for-speaker-diarization-2008.03960</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-self-supervised-hierarchical-clustering-for-speaker-diarization-2008.03960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-self-supervised-hierarchical-clustering-for-speaker-diarization-2008.03960"/></url>
<url><loc>https://scifaro.com/en/abs/vaw-gan-for-singing-voice-conversion-with-non-parallel-training-data-2008.03992</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vaw-gan-for-singing-voice-conversion-with-non-parallel-training-data-2008.03992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vaw-gan-for-singing-voice-conversion-with-non-parallel-training-data-2008.03992"/></url>
<url><loc>https://scifaro.com/en/abs/subword-regularization-an-analysis-of-scalability-and-generalization-for-end-to-end-automatic-speech-recognition-2008.04034</loc><lastmod>2020-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subword-regularization-an-analysis-of-scalability-and-generalization-for-end-to-end-automatic-speech-recognition-2008.04034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subword-regularization-an-analysis-of-scalability-and-generalization-for-end-to-end-automatic-speech-recognition-2008.04034"/></url>
<url><loc>https://scifaro.com/en/abs/phonological-features-for-0-shot-multilingual-speech-synthesis-2008.04107</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonological-features-for-0-shot-multilingual-speech-synthesis-2008.04107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonological-features-for-0-shot-multilingual-speech-synthesis-2008.04107"/></url>
<url><loc>https://scifaro.com/en/abs/tinyspeech-attention-condensers-for-deep-speech-recognition-neural-networks-on-edge-devices-2008.04245</loc><lastmod>2020-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tinyspeech-attention-condensers-for-deep-speech-recognition-neural-networks-on-edge-devices-2008.04245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tinyspeech-attention-condensers-for-deep-speech-recognition-neural-networks-on-edge-devices-2008.04245"/></url>
<url><loc>https://scifaro.com/en/abs/a-perceptually-motivated-approach-for-low-complexity-real-time-enhancement-of-fullband-speech-2008.04259</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-perceptually-motivated-approach-for-low-complexity-real-time-enhancement-of-fullband-speech-2008.04259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-perceptually-motivated-approach-for-low-complexity-real-time-enhancement-of-fullband-speech-2008.04259"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-voice-cloning-from-noisy-samples-with-domain-adversarial-training-2008.04265</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-voice-cloning-from-noisy-samples-with-domain-adversarial-training-2008.04265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-voice-cloning-from-noisy-samples-with-domain-adversarial-training-2008.04265"/></url>
<url><loc>https://scifaro.com/en/abs/poconet-better-speech-enhancement-with-frequency-positional-embeddings-semi-supervised-conversational-data-and-biased-loss-2008.04470</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/poconet-better-speech-enhancement-with-frequency-positional-embeddings-semi-supervised-conversational-data-and-biased-loss-2008.04470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/poconet-better-speech-enhancement-with-frequency-positional-embeddings-semi-supervised-conversational-data-and-biased-loss-2008.04470"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-with-bidirectional-decoder-for-speech-recognition-2008.04481</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-with-bidirectional-decoder-for-speech-recognition-2008.04481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-with-bidirectional-decoder-for-speech-recognition-2008.04481"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-aligned-lyrics-informed-singing-voice-separation-2008.04482</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-aligned-lyrics-informed-singing-voice-separation-2008.04482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-aligned-lyrics-informed-singing-voice-separation-2008.04482"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-effects-of-medical-cloth-and-transparent-face-masks-on-speech-signals-2008.04521</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-effects-of-medical-cloth-and-transparent-face-masks-on-speech-signals-2008.04521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-effects-of-medical-cloth-and-transparent-face-masks-on-speech-signals-2008.04521"/></url>
<url><loc>https://scifaro.com/en/abs/neural-plda-modeling-for-end-to-end-speaker-verification-2008.04527</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-plda-modeling-for-end-to-end-speaker-verification-2008.04527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-plda-modeling-for-end-to-end-speaker-verification-2008.04527"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-end-to-end-speaker-attributed-asr-for-continuous-multi-talker-recordings-2008.04546</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-end-to-end-speaker-attributed-asr-for-continuous-multi-talker-recordings-2008.04546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-end-to-end-speaker-attributed-asr-for-continuous-multi-talker-recordings-2008.04546"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-learning-for-sequence-to-sequence-text-to-speech-for-low-resource-languages-2008.04549</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-learning-for-sequence-to-sequence-text-to-speech-for-low-resource-languages-2008.04549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-learning-for-sequence-to-sequence-text-to-speech-for-low-resource-languages-2008.04549"/></url>
<url><loc>https://scifaro.com/en/abs/spectrum-and-prosody-conversion-for-cross-lingual-voice-conversion-with-cyclegan-2008.04562</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectrum-and-prosody-conversion-for-cross-lingual-voice-conversion-with-cyclegan-2008.04562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectrum-and-prosody-conversion-for-cross-lingual-voice-conversion-with-cyclegan-2008.04562"/></url>
<url><loc>https://scifaro.com/en/abs/bunched-lpcnet-vocoder-for-low-cost-neural-text-to-speech-systems-2008.04574</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bunched-lpcnet-vocoder-for-low-cost-neural-text-to-speech-systems-2008.04574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bunched-lpcnet-vocoder-for-low-cost-neural-text-to-speech-systems-2008.04574"/></url>
<url><loc>https://scifaro.com/en/abs/why-did-the-x-vector-system-miss-a-target-speaker-impact-of-acoustic-mismatch-upon-target-score-on-voxceleb-data-2008.04578</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-did-the-x-vector-system-miss-a-target-speaker-impact-of-acoustic-mismatch-upon-target-score-on-voxceleb-data-2008.04578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-did-the-x-vector-system-miss-a-target-speaker-impact-of-acoustic-mismatch-upon-target-score-on-voxceleb-data-2008.04578"/></url>
<url><loc>https://scifaro.com/en/abs/surgical-mask-detection-with-convolutional-neural-networks-and-data-augmentations-on-spectrograms-2008.04590</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surgical-mask-detection-with-convolutional-neural-networks-and-data-augmentations-on-spectrograms-2008.04590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surgical-mask-detection-with-convolutional-neural-networks-and-data-augmentations-on-spectrograms-2008.04590"/></url>
<url><loc>https://scifaro.com/en/abs/alzheimer-s-dementia-detection-from-audio-and-text-modalities-2008.04617</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alzheimer-s-dementia-detection-from-audio-and-text-modalities-2008.04617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alzheimer-s-dementia-detection-from-audio-and-text-modalities-2008.04617"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-for-improving-singing-voice-detection-in-polyphonic-instrumental-music-2008.04658</loc><lastmod>2020-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-for-improving-singing-voice-detection-in-polyphonic-instrumental-music-2008.04658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-for-improving-singing-voice-detection-in-polyphonic-instrumental-music-2008.04658"/></url>
<url><loc>https://scifaro.com/en/abs/s-vectors-and-tesa-speaker-embeddings-and-a-speaker-authenticator-based-on-transformer-encoder-2008.04659</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s-vectors-and-tesa-speaker-embeddings-and-a-speaker-authenticator-based-on-transformer-encoder-2008.04659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s-vectors-and-tesa-speaker-embeddings-and-a-speaker-authenticator-based-on-transformer-encoder-2008.04659"/></url>
<url><loc>https://scifaro.com/en/abs/compact-speaker-embedding-lrx-vector-2008.05011</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compact-speaker-embedding-lrx-vector-2008.05011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compact-speaker-embedding-lrx-vector-2008.05011"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-approaches-for-streaming-end-to-end-speech-recognition-system-2008.05086</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-approaches-for-streaming-end-to-end-speech-recognition-system-2008.05086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-approaches-for-streaming-end-to-end-speech-recognition-system-2008.05086"/></url>
<url><loc>https://scifaro.com/en/abs/mask-detection-and-breath-monitoring-from-speech-on-data-augmentation-feature-representation-and-modeling-2008.05175</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-detection-and-breath-monitoring-from-speech-on-data-augmentation-feature-representation-and-modeling-2008.05175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-detection-and-breath-monitoring-from-speech-on-data-augmentation-feature-representation-and-modeling-2008.05175"/></url>
<url><loc>https://scifaro.com/en/abs/channel-wise-subband-input-for-better-voice-and-accompaniment-separation-on-high-resolution-music-2008.05216</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-wise-subband-input-for-better-voice-and-accompaniment-separation-on-high-resolution-music-2008.05216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-wise-subband-input-for-better-voice-and-accompaniment-separation-on-high-resolution-music-2008.05216"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-profile-refinery-for-speech-emotion-classification-2008.05259</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-profile-refinery-for-speech-emotion-classification-2008.05259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-profile-refinery-for-speech-emotion-classification-2008.05259"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-prosodic-phrasing-with-multi-task-learning-in-tacotron-based-tts-2008.05284</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-prosodic-phrasing-with-multi-task-learning-in-tacotron-based-tts-2008.05284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-prosodic-phrasing-with-multi-task-learning-in-tacotron-based-tts-2008.05284"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditional-wavernn-towards-universal-neural-vocoder-for-unseen-speaker-and-recording-conditions-2008.05289</loc><lastmod>2020-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditional-wavernn-towards-universal-neural-vocoder-for-unseen-speaker-and-recording-conditions-2008.05289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditional-wavernn-towards-universal-neural-vocoder-for-unseen-speaker-and-recording-conditions-2008.05289"/></url>
<url><loc>https://scifaro.com/en/abs/online-automatic-speech-recognition-with-listen-attend-and-spell-model-2008.05514</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-automatic-speech-recognition-with-listen-attend-and-spell-model-2008.05514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-automatic-speech-recognition-with-listen-attend-and-spell-model-2008.05514"/></url>
<url><loc>https://scifaro.com/en/abs/mlnet-an-adaptive-multiple-receptive-field-attention-neural-network-for-voice-activity-detection-2008.05650</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mlnet-an-adaptive-multiple-receptive-field-attention-neural-network-for-voice-activity-detection-2008.05650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mlnet-an-adaptive-multiple-receptive-field-attention-neural-network-for-voice-activity-detection-2008.05650"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-learning-mechanism-for-speech-synthesis-system-without-text-length-limit-2008.05656</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-learning-mechanism-for-speech-synthesis-system-without-text-length-limit-2008.05656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-learning-mechanism-for-speech-synthesis-system-without-text-length-limit-2008.05656"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-transfer-learning-for-low-resource-spoken-language-understanding-2008.05671</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-transfer-learning-for-low-resource-spoken-language-understanding-2008.05671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-transfer-learning-for-low-resource-spoken-language-understanding-2008.05671"/></url>
<url><loc>https://scifaro.com/en/abs/evolutionary-algorithm-enhanced-neural-architecture-search-for-text-independent-speaker-verification-2008.05695</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolutionary-algorithm-enhanced-neural-architecture-search-for-text-independent-speaker-verification-2008.05695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolutionary-algorithm-enhanced-neural-architecture-search-for-text-independent-speaker-verification-2008.05695"/></url>
<url><loc>https://scifaro.com/en/abs/conv-transformer-transducer-low-latency-low-frame-rate-streamable-end-to-end-speech-recognition-2008.05750</loc><lastmod>2020-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conv-transformer-transducer-low-latency-low-frame-rate-streamable-end-to-end-speech-recognition-2008.05750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conv-transformer-transducer-low-latency-low-frame-rate-streamable-end-to-end-speech-recognition-2008.05750"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-separation-with-conformer-2008.05773</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-separation-with-conformer-2008.05773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-separation-with-conformer-2008.05773"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-quality-assessment-for-audio-visual-verification-systems-the-love-submission-to-nist-sre-challenge-2019-2008.05889</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-quality-assessment-for-audio-visual-verification-systems-the-love-submission-to-nist-sre-challenge-2019-2008.05889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-quality-assessment-for-audio-visual-verification-systems-the-love-submission-to-nist-sre-challenge-2019-2008.05889"/></url>
<url><loc>https://scifaro.com/en/abs/cross-attentive-pooling-for-speaker-verification-2008.05983</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-attentive-pooling-for-speaker-verification-2008.05983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-attentive-pooling-for-speaker-verification-2008.05983"/></url>
<url><loc>https://scifaro.com/en/abs/textual-echo-cancellation-2008.06006</loc><lastmod>2021-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/textual-echo-cancellation-2008.06006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/textual-echo-cancellation-2008.06006"/></url>
<url><loc>https://scifaro.com/en/abs/lstm-acoustic-models-learn-to-align-and-pronounce-with-graphemes-2008.06121</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lstm-acoustic-models-learn-to-align-and-pronounce-with-graphemes-2008.06121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lstm-acoustic-models-learn-to-align-and-pronounce-with-graphemes-2008.06121"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-trainable-self-attentive-shallow-network-for-text-independent-speaker-verification-2008.06146</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-trainable-self-attentive-shallow-network-for-text-independent-speaker-verification-2008.06146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-trainable-self-attentive-shallow-network-for-text-independent-speaker-verification-2008.06146"/></url>
<url><loc>https://scifaro.com/en/abs/online-speaker-adaptation-for-wavenet-based-neural-vocoders-2008.06182</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-speaker-adaptation-for-wavenet-based-neural-vocoders-2008.06182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-speaker-adaptation-for-wavenet-based-neural-vocoders-2008.06182"/></url>
<url><loc>https://scifaro.com/en/abs/adaptable-multi-domain-language-model-for-transformer-asr-2008.06208</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptable-multi-domain-language-model-for-transformer-asr-2008.06208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptable-multi-domain-language-model-for-transformer-asr-2008.06208"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-label-noise-on-a-music-tagger-2008.06273</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-label-noise-on-a-music-tagger-2008.06273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-label-noise-on-a-music-tagger-2008.06273"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-using-teacher-student-models-for-vocal-melody-extraction-2008.06358</loc><lastmod>2020-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-using-teacher-student-models-for-vocal-melody-extraction-2008.06358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-using-teacher-student-models-for-vocal-melody-extraction-2008.06358"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-and-loss-normalization-for-deep-noise-suppression-2008.06412</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-and-loss-normalization-for-deep-noise-suppression-2008.06412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-and-loss-normalization-for-deep-noise-suppression-2008.06412"/></url>
<url><loc>https://scifaro.com/en/abs/adaptation-algorithms-for-neural-network-based-speech-recognition-an-overview-2008.06580</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptation-algorithms-for-neural-network-based-speech-recognition-an-overview-2008.06580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptation-algorithms-for-neural-network-based-speech-recognition-an-overview-2008.06580"/></url>
<url><loc>https://scifaro.com/en/abs/eigenemo-spectral-utterance-representation-using-dynamic-mode-decomposition-for-speech-emotion-classification-2008.06665</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eigenemo-spectral-utterance-representation-using-dynamic-mode-decomposition-for-speech-emotion-classification-2008.06665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eigenemo-spectral-utterance-representation-using-dynamic-mode-decomposition-for-speech-emotion-classification-2008.06665"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-multiple-instance-learning-with-attention-modeling-for-categorical-speech-emotion-recognition-2008.06667</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-multiple-instance-learning-with-attention-modeling-for-categorical-speech-emotion-recognition-2008.06667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-multiple-instance-learning-with-attention-modeling-for-categorical-speech-emotion-recognition-2008.06667"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-fine-tuning-bert-like-self-supervised-models-to-improve-multimodal-speech-emotion-recognition-2008.06682</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-fine-tuning-bert-like-self-supervised-models-to-improve-multimodal-speech-emotion-recognition-2008.06682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-fine-tuning-bert-like-self-supervised-models-to-improve-multimodal-speech-emotion-recognition-2008.06682"/></url>
<url><loc>https://scifaro.com/en/abs/experimental-investigations-of-psychoacoustic-characteristics-of-household-vacuum-cleaners-2008.06702</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experimental-investigations-of-psychoacoustic-characteristics-of-household-vacuum-cleaners-2008.06702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experimental-investigations-of-psychoacoustic-characteristics-of-household-vacuum-cleaners-2008.06702"/></url>
<url><loc>https://scifaro.com/en/abs/fearless-steps-challenge-fs-2-supervised-learning-with-massive-naturalistic-apollo-data-2008.06764</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fearless-steps-challenge-fs-2-supervised-learning-with-massive-naturalistic-apollo-data-2008.06764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fearless-steps-challenge-fs-2-supervised-learning-with-massive-naturalistic-apollo-data-2008.06764"/></url>
<url><loc>https://scifaro.com/en/abs/audio-dequantization-for-high-fidelity-audio-generation-in-flow-based-neural-vocoder-2008.06867</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-dequantization-for-high-fidelity-audio-generation-in-flow-based-neural-vocoder-2008.06867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-dequantization-for-high-fidelity-audio-generation-in-flow-based-neural-vocoder-2008.06867"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-acoustic-unit-representation-learning-for-voice-conversion-using-wavenet-auto-encoders-2008.06892</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-acoustic-unit-representation-learning-for-voice-conversion-using-wavenet-auto-encoders-2008.06892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-acoustic-unit-representation-learning-for-voice-conversion-using-wavenet-auto-encoders-2008.06892"/></url>
<url><loc>https://scifaro.com/en/abs/adl-mvdr-all-deep-learning-mvdr-beamformer-for-target-speech-separation-2008.06994</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adl-mvdr-all-deep-learning-mvdr-beamformer-for-target-speech-separation-2008.06994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adl-mvdr-all-deep-learning-mvdr-beamformer-for-target-speech-separation-2008.06994"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-fully-convolutional-network-and-visualization-techniques-on-spontaneous-speech-for-dementia-detection-2008.07052</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-fully-convolutional-network-and-visualization-techniques-on-spontaneous-speech-for-dementia-detection-2008.07052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-fully-convolutional-network-and-visualization-techniques-on-spontaneous-speech-for-dementia-detection-2008.07052"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-for-interpretable-weakly-labelled-sound-event-detection-2008.07085</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-for-interpretable-weakly-labelled-sound-event-detection-2008.07085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-for-interpretable-weakly-labelled-sound-event-detection-2008.07085"/></url>
<url><loc>https://scifaro.com/en/abs/pianotree-vae-structured-representation-learning-for-polyphonic-music-2008.07118</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pianotree-vae-structured-representation-learning-for-polyphonic-music-2008.07118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pianotree-vae-structured-representation-learning-for-polyphonic-music-2008.07118"/></url>
<url><loc>https://scifaro.com/en/abs/deep-variational-generative-models-for-audio-visual-speech-separation-2008.07191</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-variational-generative-models-for-audio-visual-speech-separation-2008.07191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-variational-generative-models-for-audio-visual-speech-separation-2008.07191"/></url>
<url><loc>https://scifaro.com/en/abs/storir-stochastic-room-impulse-response-generation-for-audio-data-augmentation-2008.07231</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/storir-stochastic-room-impulse-response-generation-for-audio-data-augmentation-2008.07231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/storir-stochastic-room-impulse-response-generation-for-audio-data-augmentation-2008.07231"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-low-latency-speech-enhancement-with-mobile-audio-streaming-networks-2008.07244</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-low-latency-speech-enhancement-with-mobile-audio-streaming-networks-2008.07244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-low-latency-speech-enhancement-with-mobile-audio-streaming-networks-2008.07244"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-open-set-acoustic-scene-classification-2008.07247</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-open-set-acoustic-scene-classification-2008.07247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-open-set-acoustic-scene-classification-2008.07247"/></url>
<url><loc>https://scifaro.com/en/abs/on-mean-absolute-error-for-deep-neural-network-based-vector-to-vector-regression-2008.07281</loc><lastmod>2020-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-mean-absolute-error-for-deep-neural-network-based-vector-to-vector-regression-2008.07281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-mean-absolute-error-for-deep-neural-network-based-vector-to-vector-regression-2008.07281"/></url>
<url><loc>https://scifaro.com/en/abs/do-face-masks-introduce-bias-in-speech-technologies-the-case-of-automated-scoring-of-speaking-proficiency-2008.07520</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-face-masks-introduce-bias-in-speech-technologies-the-case-of-automated-scoring-of-speaking-proficiency-2008.07520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-face-masks-introduce-bias-in-speech-technologies-the-case-of-automated-scoring-of-speaking-proficiency-2008.07520"/></url>
<url><loc>https://scifaro.com/en/abs/music-boundary-detection-using-convolutional-neural-networks-a-comparative-analysis-of-combined-input-features-2008.07527</loc><lastmod>2021-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-boundary-detection-using-convolutional-neural-networks-a-comparative-analysis-of-combined-input-features-2008.07527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-boundary-detection-using-convolutional-neural-networks-a-comparative-analysis-of-combined-input-features-2008.07527"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-broad-phonetic-information-for-speech-enhancement-2008.07618</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-broad-phonetic-information-for-speech-enhancement-2008.07618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-broad-phonetic-information-for-speech-enhancement-2008.07618"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-using-eeg-signals-recorded-using-dry-electrodes-2008.07621</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-using-eeg-signals-recorded-using-dry-electrodes-2008.07621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-using-eeg-signals-recorded-using-dry-electrodes-2008.07621"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-source-separation-applied-to-choir-ensembles-2008.07645</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-source-separation-applied-to-choir-ensembles-2008.07645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-source-separation-applied-to-choir-ensembles-2008.07645"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attack-and-defense-strategies-for-deep-speaker-recognition-systems-2008.07685</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attack-and-defense-strategies-for-deep-speaker-recognition-systems-2008.07685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attack-and-defense-strategies-for-deep-speaker-recognition-systems-2008.07685"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-robot-based-auxiliary-system-for-risk-evaluation-of-covid-19-infection-2008.07695</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-robot-based-auxiliary-system-for-risk-evaluation-of-covid-19-infection-2008.07695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-robot-based-auxiliary-system-for-risk-evaluation-of-covid-19-infection-2008.07695"/></url>
<url><loc>https://scifaro.com/en/abs/tdcgan-temporal-dilated-convolutional-generative-adversarial-network-for-end-to-end-speech-enhancement-2008.07787</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tdcgan-temporal-dilated-convolutional-generative-adversarial-network-for-end-to-end-speech-enhancement-2008.07787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tdcgan-temporal-dilated-convolutional-generative-adversarial-network-for-end-to-end-speech-enhancement-2008.07787"/></url>
<url><loc>https://scifaro.com/en/abs/cinc-gan-for-effective-f0-prediction-for-whisper-to-normal-speech-conversion-2008.07788</loc><lastmod>2020-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cinc-gan-for-effective-f0-prediction-for-whisper-to-normal-speech-conversion-2008.07788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cinc-gan-for-effective-f0-prediction-for-whisper-to-normal-speech-conversion-2008.07788"/></url>
<url><loc>https://scifaro.com/en/abs/complementary-language-model-and-parallel-bi-lrnn-for-false-trigger-mitigation-2008.08113</loc><lastmod>2020-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complementary-language-model-and-parallel-bi-lrnn-for-false-trigger-mitigation-2008.08113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complementary-language-model-and-parallel-bi-lrnn-for-false-trigger-mitigation-2008.08113"/></url>
<url><loc>https://scifaro.com/en/abs/hprnet-incorporating-residual-noise-modeling-for-violin-in-a-variational-parametric-synthesizer-2008.08405</loc><lastmod>2020-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hprnet-incorporating-residual-noise-modeling-for-violin-in-a-variational-parametric-synthesizer-2008.08405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hprnet-incorporating-residual-noise-modeling-for-violin-in-a-variational-parametric-synthesizer-2008.08405"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-goodness-of-pronunciation-for-computer-assisted-pronunciation-training-2008.08647</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-goodness-of-pronunciation-for-computer-assisted-pronunciation-training-2008.08647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-goodness-of-pronunciation-for-computer-assisted-pronunciation-training-2008.08647"/></url>
<url><loc>https://scifaro.com/en/abs/a-generalized-framework-for-domain-adaptation-of-plda-in-speaker-recognition-2008.08815</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generalized-framework-for-domain-adaptation-of-plda-in-speaker-recognition-2008.08815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generalized-framework-for-domain-adaptation-of-plda-in-speaker-recognition-2008.08815"/></url>
<url><loc>https://scifaro.com/en/abs/using-multi-resolution-feature-maps-with-convolutional-neural-networks-for-anti-spoofing-in-asv-2008.08865</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-multi-resolution-feature-maps-with-convolutional-neural-networks-for-anti-spoofing-in-asv-2008.08865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-multi-resolution-feature-maps-with-convolutional-neural-networks-for-anti-spoofing-in-asv-2008.08865"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-utterance-dual-attention-for-speaker-and-utterance-verification-2008.08901</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-utterance-dual-attention-for-speaker-and-utterance-verification-2008.08901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-utterance-dual-attention-for-speaker-and-utterance-verification-2008.08901"/></url>
<url><loc>https://scifaro.com/en/abs/generating-music-with-a-self-correcting-non-chronological-autoregressive-model-2008.08927</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-music-with-a-self-correcting-non-chronological-autoregressive-model-2008.08927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-music-with-a-self-correcting-non-chronological-autoregressive-model-2008.08927"/></url>
<url><loc>https://scifaro.com/en/abs/asya-mindful-verbal-communication-using-deep-learning-2008.08965</loc><lastmod>2020-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asya-mindful-verbal-communication-using-deep-learning-2008.08965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asya-mindful-verbal-communication-using-deep-learning-2008.08965"/></url>
<url><loc>https://scifaro.com/en/abs/blind-mask-to-improve-intelligibility-of-non-stationary-noisy-speech-2008.09175</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-mask-to-improve-intelligibility-of-non-stationary-noisy-speech-2008.09175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-mask-to-improve-intelligibility-of-non-stationary-noisy-speech-2008.09175"/></url>
<url><loc>https://scifaro.com/en/abs/dyadic-speech-based-affect-recognition-using-dami-p2c-parent-child-multimodal-interaction-dataset-2008.09207</loc><lastmod>2020-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dyadic-speech-based-affect-recognition-using-dami-p2c-parent-child-multimodal-interaction-dataset-2008.09207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dyadic-speech-based-affect-recognition-using-dami-p2c-parent-child-multimodal-interaction-dataset-2008.09207"/></url>
<url><loc>https://scifaro.com/en/abs/citisen-a-deep-learning-based-speech-signal-processing-mobile-application-2008.09264</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/citisen-a-deep-learning-based-speech-signal-processing-mobile-application-2008.09264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/citisen-a-deep-learning-based-speech-signal-processing-mobile-application-2008.09264"/></url>
<url><loc>https://scifaro.com/en/abs/laughter-synthesis-combining-seq2seq-modeling-with-transfer-learning-2008.09483</loc><lastmod>2020-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/laughter-synthesis-combining-seq2seq-modeling-with-transfer-learning-2008.09483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/laughter-synthesis-combining-seq2seq-modeling-with-transfer-learning-2008.09483"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-deep-learning-based-audio-visual-speech-enhancement-and-separation-2008.09586</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-deep-learning-based-audio-visual-speech-enhancement-and-separation-2008.09586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-deep-learning-based-audio-visual-speech-enhancement-and-separation-2008.09586"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-neural-speech-synthesis-for-low-resource-languages-through-multilingual-modeling-2008.09659</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-neural-speech-synthesis-for-low-resource-languages-through-multilingual-modeling-2008.09659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-neural-speech-synthesis-for-low-resource-languages-through-multilingual-modeling-2008.09659"/></url>
<url><loc>https://scifaro.com/en/abs/they-are-wearing-a-mask-identification-of-subjects-wearing-a-surgical-mask-from-their-speech-by-means-of-x-vectors-and-fisher-vectors-2008.10014</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/they-are-wearing-a-mask-identification-of-subjects-wearing-a-surgical-mask-from-their-speech-by-means-of-x-vectors-and-fisher-vectors-2008.10014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/they-are-wearing-a-mask-identification-of-subjects-wearing-a-surgical-mask-from-their-speech-by-means-of-x-vectors-and-fisher-vectors-2008.10014"/></url>
<url><loc>https://scifaro.com/en/abs/amrconvnet-amr-coded-speech-enhancement-using-convolutional-neural-networks-2008.10233</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amrconvnet-amr-coded-speech-enhancement-using-convolutional-neural-networks-2008.10233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amrconvnet-amr-coded-speech-enhancement-using-convolutional-neural-networks-2008.10233"/></url>
<url><loc>https://scifaro.com/en/abs/a-computational-analysis-of-real-world-dj-mixes-using-mix-to-track-subsequence-alignment-2008.10267</loc><lastmod>2020-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computational-analysis-of-real-world-dj-mixes-using-mix-to-track-subsequence-alignment-2008.10267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computational-analysis-of-real-world-dj-mixes-using-mix-to-track-subsequence-alignment-2008.10267"/></url>
<url><loc>https://scifaro.com/en/abs/improving-tail-performance-of-a-deliberation-e2e-asr-model-using-a-large-text-corpus-2008.10491</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-tail-performance-of-a-deliberation-e2e-asr-model-using-a-large-text-corpus-2008.10491"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-tail-performance-of-a-deliberation-e2e-asr-model-using-a-large-text-corpus-2008.10491"/></url>
<url><loc>https://scifaro.com/en/abs/aphasic-speech-recognition-using-a-mixture-of-speech-intelligibility-experts-2008.10788</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aphasic-speech-recognition-using-a-mixture-of-speech-intelligibility-experts-2008.10788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aphasic-speech-recognition-using-a-mixture-of-speech-intelligibility-experts-2008.10788"/></url>
<url><loc>https://scifaro.com/en/abs/ice-talk-an-interface-for-a-controllable-expressive-talking-machine-2008.11045</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ice-talk-an-interface-for-a-controllable-expressive-talking-machine-2008.11045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ice-talk-an-interface-for-a-controllable-expressive-talking-machine-2008.11045"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-text-independent-speaker-verification-using-3d-cnn-2008.11088</loc><lastmod>2020-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-text-independent-speaker-verification-using-3d-cnn-2008.11088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-text-independent-speaker-verification-using-3d-cnn-2008.11088"/></url>
<url><loc>https://scifaro.com/en/abs/independent-vector-analysis-with-deep-neural-network-source-priors-2008.11273</loc><lastmod>2020-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-vector-analysis-with-deep-neural-network-source-priors-2008.11273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-vector-analysis-with-deep-neural-network-source-priors-2008.11273"/></url>
<url><loc>https://scifaro.com/en/abs/the-freesound-loop-dataset-and-annotation-tool-2008.11507</loc><lastmod>2020-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-freesound-loop-dataset-and-annotation-tool-2008.11507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-freesound-loop-dataset-and-annotation-tool-2008.11507"/></url>
<url><loc>https://scifaro.com/en/abs/tiv-lib-an-open-source-library-for-the-tonal-description-of-musical-audio-2008.11529</loc><lastmod>2020-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiv-lib-an-open-source-library-for-the-tonal-description-of-musical-audio-2008.11529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiv-lib-an-open-source-library-for-the-tonal-description-of-musical-audio-2008.11529"/></url>
<url><loc>https://scifaro.com/en/abs/learned-transferable-architectures-can-surpass-hand-designed-architectures-for-large-scale-speech-recognition-2008.11589</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learned-transferable-architectures-can-surpass-hand-designed-architectures-for-large-scale-speech-recognition-2008.11589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learned-transferable-architectures-can-surpass-hand-designed-architectures-for-large-scale-speech-recognition-2008.11589"/></url>
<url><loc>https://scifaro.com/en/abs/adversarially-training-for-audio-classifiers-2008.11618</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarially-training-for-audio-classifiers-2008.11618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarially-training-for-audio-classifiers-2008.11618"/></url>
<url><loc>https://scifaro.com/en/abs/deepvox-discovering-features-from-raw-audio-for-speaker-recognition-in-non-ideal-audio-signals-2008.11668</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepvox-discovering-features-from-raw-audio-for-speaker-recognition-in-non-ideal-audio-signals-2008.11668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepvox-discovering-features-from-raw-audio-for-speaker-recognition-in-non-ideal-audio-signals-2008.11668"/></url>
<url><loc>https://scifaro.com/en/abs/fcn-approach-for-dynamically-locating-multiple-speakers-2008.11845</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fcn-approach-for-dynamically-locating-multiple-speakers-2008.11845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fcn-approach-for-dynamically-locating-multiple-speakers-2008.11845"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-noise-embedding-noise-aware-training-and-adaptation-for-speech-enhancement-2008.11920</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-noise-embedding-noise-aware-training-and-adaptation-for-speech-enhancement-2008.11920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-noise-embedding-noise-aware-training-and-adaptation-for-speech-enhancement-2008.11920"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-uniqueness-of-i-vector-representation-of-human-voice-2008.11985</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-uniqueness-of-i-vector-representation-of-human-voice-2008.11985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-uniqueness-of-i-vector-representation-of-human-voice-2008.11985"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-music-mixed-speech-recognition-2008.12048</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-music-mixed-speech-recognition-2008.12048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-music-mixed-speech-recognition-2008.12048"/></url>
<url><loc>https://scifaro.com/en/abs/drumgan-synthesis-of-drum-sounds-with-timbral-feature-conditioning-using-generative-adversarial-networks-2008.12073</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drumgan-synthesis-of-drum-sounds-with-timbral-feature-conditioning-using-generative-adversarial-networks-2008.12073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drumgan-synthesis-of-drum-sounds-with-timbral-feature-conditioning-using-generative-adversarial-networks-2008.12073"/></url>
<url><loc>https://scifaro.com/en/abs/length-and-noise-aware-training-techniques-for-short-utterance-speaker-recognition-2008.12218</loc><lastmod>2020-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/length-and-noise-aware-training-techniques-for-short-utterance-speaker-recognition-2008.12218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/length-and-noise-aware-training-techniques-for-short-utterance-speaker-recognition-2008.12218"/></url>
<url><loc>https://scifaro.com/en/abs/listener-position-and-orientation-dependency-of-auditory-perception-in-an-enclosed-space-elicitation-of-salient-attributes-2008.12255</loc><lastmod>2020-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listener-position-and-orientation-dependency-of-auditory-perception-in-an-enclosed-space-elicitation-of-salient-attributes-2008.12255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listener-position-and-orientation-dependency-of-auditory-perception-in-an-enclosed-space-elicitation-of-salient-attributes-2008.12255"/></url>
<url><loc>https://scifaro.com/en/abs/speech-sentiment-and-customer-satisfaction-estimation-in-socialbot-conversations-2008.12376</loc><lastmod>2020-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-sentiment-and-customer-satisfaction-estimation-in-socialbot-conversations-2008.12376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-sentiment-and-customer-satisfaction-estimation-in-socialbot-conversations-2008.12376"/></url>
<url><loc>https://scifaro.com/en/abs/text-conditioned-transformer-for-automatic-pronunciation-error-detection-2008.12424</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-conditioned-transformer-for-automatic-pronunciation-error-detection-2008.12424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-conditioned-transformer-for-automatic-pronunciation-error-detection-2008.12424"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-challenge-2020-intra-lingual-semi-parallel-and-cross-lingual-voice-conversion-2008.12527</loc><lastmod>2020-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-challenge-2020-intra-lingual-semi-parallel-and-cross-lingual-voice-conversion-2008.12527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-challenge-2020-intra-lingual-semi-parallel-and-cross-lingual-voice-conversion-2008.12527"/></url>
<url><loc>https://scifaro.com/en/abs/nonparallel-voice-conversion-with-augmented-classifier-star-generative-adversarial-networks-2008.12604</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonparallel-voice-conversion-with-augmented-classifier-star-generative-adversarial-networks-2008.12604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonparallel-voice-conversion-with-augmented-classifier-star-generative-adversarial-networks-2008.12604"/></url>
<url><loc>https://scifaro.com/en/abs/source-aware-neural-speech-coding-for-noisy-speech-compression-2008.12889</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-aware-neural-speech-coding-for-noisy-speech-compression-2008.12889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-aware-neural-speech-coding-for-noisy-speech-compression-2008.12889"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-using-prosody-and-false-starts-to-recognize-non-native-children-s-speech-2008.12914</loc><lastmod>2020-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-using-prosody-and-false-starts-to-recognize-non-native-children-s-speech-2008.12914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-using-prosody-and-false-starts-to-recognize-non-native-children-s-speech-2008.12914"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-rescoring-with-transformer-for-streaming-on-device-speech-recognition-2008.13093</loc><lastmod>2020-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-rescoring-with-transformer-for-streaming-on-device-speech-recognition-2008.13093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-rescoring-with-transformer-for-streaming-on-device-speech-recognition-2008.13093"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-timbre-painting-and-articulation-generation-2008.13095</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-timbre-painting-and-articulation-generation-2008.13095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-timbre-painting-and-articulation-generation-2008.13095"/></url>
<url><loc>https://scifaro.com/en/abs/speech-pseudonymisation-assessment-using-voice-similarity-matrices-2008.13144</loc><lastmod>2020-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-pseudonymisation-assessment-using-voice-similarity-matrices-2008.13144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-pseudonymisation-assessment-using-voice-similarity-matrices-2008.13144"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-of-speaker-type-pldas-for-children-s-speech-diarization-2008.13213</loc><lastmod>2020-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-of-speaker-type-pldas-for-children-s-speech-diarization-2008.13213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-of-speaker-type-pldas-for-children-s-speech-diarization-2008.13213"/></url>
<url><loc>https://scifaro.com/en/abs/improved-lite-audio-visual-speech-enhancement-2008.13222</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-lite-audio-visual-speech-enhancement-2008.13222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-lite-audio-visual-speech-enhancement-2008.13222"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-for-keyword-spotting-2009.00165</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-for-keyword-spotting-2009.00165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-for-keyword-spotting-2009.00165"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-memory-in-lstm-rnns-for-source-separation-2009.00551</loc><lastmod>2020-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-memory-in-lstm-rnns-for-source-separation-2009.00551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-memory-in-lstm-rnns-for-source-separation-2009.00551"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-inductive-transfer-learning-for-detection-of-alzheimer-s-dementia-and-its-severity-2009.00700</loc><lastmod>2020-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-inductive-transfer-learning-for-detection-of-alzheimer-s-dementia-and-its-severity-2009.00700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-inductive-transfer-learning-for-detection-of-alzheimer-s-dementia-and-its-severity-2009.00700"/></url>
<url><loc>https://scifaro.com/en/abs/wavegrad-estimating-gradients-for-waveform-generation-2009.00713</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavegrad-estimating-gradients-for-waveform-generation-2009.00713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavegrad-estimating-gradients-for-waveform-generation-2009.00713"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-representation-learning-using-global-context-guided-channel-and-time-frequency-transformations-2009.00768</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-representation-learning-using-global-context-guided-channel-and-time-frequency-transformations-2009.00768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-representation-learning-using-global-context-guided-channel-and-time-frequency-transformations-2009.00768"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-parkinson-s-disease-from-an-online-speech-task-2009.01231</loc><lastmod>2020-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-parkinson-s-disease-from-an-online-speech-task-2009.01231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-parkinson-s-disease-from-an-online-speech-task-2009.01231"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-speech-recognition-with-pitch-and-voice-quality-features-2009.01309</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-speech-recognition-with-pitch-and-voice-quality-features-2009.01309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-speech-recognition-with-pitch-and-voice-quality-features-2009.01309"/></url>
<url><loc>https://scifaro.com/en/abs/sagrnn-self-attentive-gated-rnn-for-binaural-speaker-separation-with-interaural-cue-preservation-2009.01381</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sagrnn-self-attentive-gated-rnn-for-binaural-speaker-separation-with-interaural-cue-preservation-2009.01381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sagrnn-self-attentive-gated-rnn-for-binaural-speaker-separation-with-interaural-cue-preservation-2009.01381"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-by-cascading-automatic-speech-recognition-and-text-to-speech-synthesis-with-prosody-transfer-2009.01475</loc><lastmod>2020-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-by-cascading-automatic-speech-recognition-and-text-to-speech-synthesis-with-prosody-transfer-2009.01475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-by-cascading-automatic-speech-recognition-and-text-to-speech-synthesis-with-prosody-transfer-2009.01475"/></url>
<url><loc>https://scifaro.com/en/abs/intra-utterance-similarity-preserving-knowledge-distillation-for-audio-tagging-2009.01759</loc><lastmod>2020-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intra-utterance-similarity-preserving-knowledge-distillation-for-audio-tagging-2009.01759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intra-utterance-similarity-preserving-knowledge-distillation-for-audio-tagging-2009.01759"/></url>
<url><loc>https://scifaro.com/en/abs/hifisinger-towards-high-fidelity-neural-singing-voice-synthesis-2009.01776</loc><lastmod>2020-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifisinger-towards-high-fidelity-neural-singing-voice-synthesis-2009.01776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifisinger-towards-high-fidelity-neural-singing-voice-synthesis-2009.01776"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-representation-learning-2009.01822</loc><lastmod>2023-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-representation-learning-2009.01822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-early-frequency-attention-for-deep-speaker-representation-learning-2009.01822"/></url>
<url><loc>https://scifaro.com/en/abs/dense-cnn-with-self-attention-for-time-domain-speech-enhancement-2009.01941</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dense-cnn-with-self-attention-for-time-domain-speech-enhancement-2009.01941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dense-cnn-with-self-attention-for-time-domain-speech-enhancement-2009.01941"/></url>
<url><loc>https://scifaro.com/en/abs/what-the-future-brings-investigating-the-impact-of-lookahead-for-incremental-neural-tts-2009.02035</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-the-future-brings-investigating-the-impact-of-lookahead-for-incremental-neural-tts-2009.02035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-the-future-brings-investigating-the-impact-of-lookahead-for-incremental-neural-tts-2009.02035"/></url>
<url><loc>https://scifaro.com/en/abs/seanet-a-multi-modal-speech-enhancement-network-2009.02095</loc><lastmod>2020-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seanet-a-multi-modal-speech-enhancement-network-2009.02095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seanet-a-multi-modal-speech-enhancement-network-2009.02095"/></url>
<url><loc>https://scifaro.com/en/abs/silent-speech-interfaces-for-speech-restoration-a-review-2009.02110</loc><lastmod>2020-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/silent-speech-interfaces-for-speech-restoration-a-review-2009.02110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/silent-speech-interfaces-for-speech-restoration-a-review-2009.02110"/></url>
<url><loc>https://scifaro.com/en/abs/degradation-effects-of-water-immersion-on-earbud-audio-quality-2009.02151</loc><lastmod>2020-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/degradation-effects-of-water-immersion-on-earbud-audio-quality-2009.02151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/degradation-effects-of-water-immersion-on-earbud-audio-quality-2009.02151"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-adaptation-with-discrepancy-minimization-for-text-independent-forensic-speaker-verification-2009.02444</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-adaptation-with-discrepancy-minimization-for-text-independent-forensic-speaker-verification-2009.02444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-adaptation-with-discrepancy-minimization-for-text-independent-forensic-speaker-verification-2009.02444"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-view-approach-for-mandarin-non-native-mispronunciation-verification-2009.02573</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-view-approach-for-mandarin-non-native-mispronunciation-verification-2009.02573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-view-approach-for-mandarin-non-native-mispronunciation-verification-2009.02573"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-multi-modal-emotion-recognition-with-cross-modal-distribution-matching-2009.02598</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-multi-modal-emotion-recognition-with-cross-modal-distribution-matching-2009.02598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-multi-modal-emotion-recognition-with-cross-modal-distribution-matching-2009.02598"/></url>
<url><loc>https://scifaro.com/en/abs/any-to-many-voice-conversion-with-location-relative-sequence-to-sequence-modeling-2009.02725</loc><lastmod>2021-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/any-to-many-voice-conversion-with-location-relative-sequence-to-sequence-modeling-2009.02725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/any-to-many-voice-conversion-with-location-relative-sequence-to-sequence-modeling-2009.02725"/></url>
<url><loc>https://scifaro.com/en/abs/overview-and-evaluation-of-sound-event-localization-and-detection-in-dcase-2019-2009.02792</loc><lastmod>2021-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-and-evaluation-of-sound-event-localization-and-detection-in-dcase-2019-2009.02792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-and-evaluation-of-sound-event-localization-and-detection-in-dcase-2019-2009.02792"/></url>
<url><loc>https://scifaro.com/en/abs/libri-adapt-a-new-speech-dataset-for-unsupervised-domain-adaptation-2009.02814</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libri-adapt-a-new-speech-dataset-for-unsupervised-domain-adaptation-2009.02814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libri-adapt-a-new-speech-dataset-for-unsupervised-domain-adaptation-2009.02814"/></url>
<url><loc>https://scifaro.com/en/abs/non-causal-deep-learning-based-dereverberation-2009.02832</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-causal-deep-learning-based-dereverberation-2009.02832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-causal-deep-learning-based-dereverberation-2009.02832"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-virtual-analog-modelling-techniques-for-desktop-and-embedded-implementations-2009.02833</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-virtual-analog-modelling-techniques-for-desktop-and-embedded-implementations-2009.02833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-virtual-analog-modelling-techniques-for-desktop-and-embedded-implementations-2009.02833"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-single-ended-objective-quality-measures-for-time-scale-modified-audio-2009.02940</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-single-ended-objective-quality-measures-for-time-scale-modified-audio-2009.02940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-single-ended-objective-quality-measures-for-time-scale-modified-audio-2009.02940"/></url>
<url><loc>https://scifaro.com/en/abs/kospeech-open-source-toolkit-for-end-to-end-korean-speech-recognition-2009.03092</loc><lastmod>2020-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kospeech-open-source-toolkit-for-end-to-end-korean-speech-recognition-2009.03092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kospeech-open-source-toolkit-for-end-to-end-korean-speech-recognition-2009.03092"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-architecture-of-online-multi-channel-speech-separation-2009.03141</loc><lastmod>2020-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-architecture-of-online-multi-channel-speech-separation-2009.03141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-architecture-of-online-multi-channel-speech-separation-2009.03141"/></url>
<url><loc>https://scifaro.com/en/abs/predictions-of-subjective-ratings-and-spoofing-assessments-of-voice-conversion-challenge-2020-submissions-2009.03554</loc><lastmod>2020-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predictions-of-subjective-ratings-and-spoofing-assessments-of-voice-conversion-challenge-2020-submissions-2009.03554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predictions-of-subjective-ratings-and-spoofing-assessments-of-voice-conversion-challenge-2020-submissions-2009.03554"/></url>
<url><loc>https://scifaro.com/en/abs/autokws-keyword-spotting-with-differentiable-architecture-search-2009.03658</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autokws-keyword-spotting-with-differentiable-architecture-search-2009.03658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autokws-keyword-spotting-with-differentiable-architecture-search-2009.03658"/></url>
<url><loc>https://scifaro.com/en/abs/toward-speech-separation-in-the-pre-cocktail-party-problem-with-tastas-2009.03692</loc><lastmod>2023-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-speech-separation-in-the-pre-cocktail-party-problem-with-tastas-2009.03692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-speech-separation-in-the-pre-cocktail-party-problem-with-tastas-2009.03692"/></url>
<url><loc>https://scifaro.com/en/abs/1-dimensional-polynomial-neural-networks-for-audio-signal-related-problems-2009.04077</loc><lastmod>2022-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1-dimensional-polynomial-neural-networks-for-audio-signal-related-problems-2009.04077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1-dimensional-polynomial-neural-networks-for-audio-signal-related-problems-2009.04077"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-attention-for-speech-emotion-recognition-2009.04107</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-attention-for-speech-emotion-recognition-2009.04107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-attention-for-speech-emotion-recognition-2009.04107"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-f0-estimation-in-vocal-ensembles-using-convolutional-neural-networks-2009.04172</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-f0-estimation-in-vocal-ensembles-using-convolutional-neural-networks-2009.04172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-f0-estimation-in-vocal-ensembles-using-convolutional-neural-networks-2009.04172"/></url>
<url><loc>https://scifaro.com/en/abs/voicefilter-lite-streaming-targeted-voice-separation-for-on-device-speech-recognition-2009.04323</loc><lastmod>2020-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicefilter-lite-streaming-targeted-voice-separation-for-on-device-speech-recognition-2009.04323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicefilter-lite-streaming-targeted-voice-separation-for-on-device-speech-recognition-2009.04323"/></url>
<url><loc>https://scifaro.com/en/abs/hardware-aware-training-for-efficient-keyword-spotting-on-general-purpose-and-specialized-hardware-2009.04465</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hardware-aware-training-for-efficient-keyword-spotting-on-general-purpose-and-specialized-hardware-2009.04465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hardware-aware-training-for-efficient-keyword-spotting-on-general-purpose-and-specialized-hardware-2009.04465"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2021-acoustic-echo-cancellation-challenge-datasets-testing-framework-and-results-2009.04972</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2021-acoustic-echo-cancellation-challenge-datasets-testing-framework-and-results-2009.04972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2021-acoustic-echo-cancellation-challenge-datasets-testing-framework-and-results-2009.04972"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-end-to-end-synthesisers-forzero-resource-speech-challenge-2020-2009.04983</loc><lastmod>2020-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-end-to-end-synthesisers-forzero-resource-speech-challenge-2020-2009.04983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-end-to-end-synthesisers-forzero-resource-speech-challenge-2020-2009.04983"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-clustering-using-stereo-audio-channels-2009.05076</loc><lastmod>2021-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-clustering-using-stereo-audio-channels-2009.05076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-clustering-using-stereo-audio-channels-2009.05076"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-minimal-distortion-principle-for-blind-source-separation-2009.05288</loc><lastmod>2020-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-minimal-distortion-principle-for-blind-source-separation-2009.05288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-minimal-distortion-principle-for-blind-source-separation-2009.05288"/></url>
<url><loc>https://scifaro.com/en/abs/text-independent-speaker-verification-with-dual-attention-network-2009.05485</loc><lastmod>2020-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-independent-speaker-verification-with-dual-attention-network-2009.05485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-independent-speaker-verification-with-dual-attention-network-2009.05485"/></url>
<url><loc>https://scifaro.com/en/abs/recoapy-data-recording-pre-processing-and-phonetic-transcription-for-end-to-end-speech-based-applications-2009.05493</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recoapy-data-recording-pre-processing-and-phonetic-transcription-for-end-to-end-speech-based-applications-2009.05493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recoapy-data-recording-pre-processing-and-phonetic-transcription-for-end-to-end-speech-based-applications-2009.05493"/></url>
<url><loc>https://scifaro.com/en/abs/on-multitask-loss-function-for-audio-event-detection-and-localization-2009.05527</loc><lastmod>2020-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-multitask-loss-function-for-audio-event-detection-and-localization-2009.05527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-multitask-loss-function-for-audio-event-detection-and-localization-2009.05527"/></url>
<url><loc>https://scifaro.com/en/abs/visual-speech-synthesis-of-exaggerated-corrective-feedback-2009.05748</loc><lastmod>2020-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-speech-synthesis-of-exaggerated-corrective-feedback-2009.05748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-speech-synthesis-of-exaggerated-corrective-feedback-2009.05748"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-2009.06122</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-2009.06122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2021-deep-noise-suppression-challenge-2009.06122"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-neural-text-to-speech-synthesis-using-intuitive-prosodic-features-2009.06775</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-neural-text-to-speech-synthesis-using-intuitive-prosodic-features-2009.06775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-neural-text-to-speech-synthesis-using-intuitive-prosodic-features-2009.06775"/></url>
<url><loc>https://scifaro.com/en/abs/when-automatic-voice-disguise-meets-automatic-speaker-verification-2009.06863</loc><lastmod>2020-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/when-automatic-voice-disguise-meets-automatic-speaker-verification-2009.06863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/when-automatic-voice-disguise-meets-automatic-speaker-verification-2009.06863"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-intent-recognition-from-keywords-2009.08064</loc><lastmod>2020-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-intent-recognition-from-keywords-2009.08064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-intent-recognition-from-keywords-2009.08064"/></url>
<url><loc>https://scifaro.com/en/abs/online-speaker-diarization-with-relation-network-2009.08162</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-speaker-diarization-with-relation-network-2009.08162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-speaker-diarization-with-relation-network-2009.08162"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-multi-grained-generative-model-for-expressive-speech-synthesis-2009.08474</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-multi-grained-generative-model-for-expressive-speech-synthesis-2009.08474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-multi-grained-generative-model-for-expressive-speech-synthesis-2009.08474"/></url>
<url><loc>https://scifaro.com/en/abs/x-dc-explainable-deep-clustering-based-on-learnable-spectrogram-templates-2009.08661</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-dc-explainable-deep-clustering-based-on-learnable-spectrogram-templates-2009.08661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-dc-explainable-deep-clustering-based-on-learnable-spectrogram-templates-2009.08661"/></url>
<url><loc>https://scifaro.com/en/abs/far-field-automatic-speech-recognition-2009.09395</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/far-field-automatic-speech-recognition-2009.09395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/far-field-automatic-speech-recognition-2009.09395"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-auxiliary-function-based-independent-vector-analysis-2009.09402</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-auxiliary-function-based-independent-vector-analysis-2009.09402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-auxiliary-function-based-independent-vector-analysis-2009.09402"/></url>
<url><loc>https://scifaro.com/en/abs/open-set-short-utterance-forensic-speaker-verification-using-teacher-student-network-with-explicit-inductive-bias-2009.09556</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-set-short-utterance-forensic-speaker-verification-using-teacher-student-network-with-explicit-inductive-bias-2009.09556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-set-short-utterance-forensic-speaker-verification-using-teacher-student-network-with-explicit-inductive-bias-2009.09556"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-bengali-speech-recognition-2009.09615</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-bengali-speech-recognition-2009.09615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-bengali-speech-recognition-2009.09615"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-sound-events-using-convolutional-macaron-net-with-pseudo-strong-labels-2009.09632</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-sound-events-using-convolutional-macaron-net-with-pseudo-strong-labels-2009.09632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-sound-events-using-convolutional-macaron-net-with-pseudo-strong-labels-2009.09632"/></url>
<url><loc>https://scifaro.com/en/abs/light-convolutional-neural-network-with-feature-genuinization-for-detection-of-synthetic-speech-attacks-2009.09637</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/light-convolutional-neural-network-with-feature-genuinization-for-detection-of-synthetic-speech-attacks-2009.09637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/light-convolutional-neural-network-with-feature-genuinization-for-detection-of-synthetic-speech-attacks-2009.09637"/></url>
<url><loc>https://scifaro.com/en/abs/dcasenet-an-integrated-pretrained-deep-neural-network-for-detecting-and-classifying-acoustic-scenes-and-events-2009.09642</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcasenet-an-integrated-pretrained-deep-neural-network-for-detecting-and-classifying-acoustic-scenes-and-events-2009.09642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcasenet-an-integrated-pretrained-deep-neural-network-for-detecting-and-classifying-acoustic-scenes-and-events-2009.09642"/></url>
<url><loc>https://scifaro.com/en/abs/diffwave-a-versatile-diffusion-model-for-audio-synthesis-2009.09761</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffwave-a-versatile-diffusion-model-for-audio-synthesis-2009.09761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffwave-a-versatile-diffusion-model-for-audio-synthesis-2009.09761"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-learning-based-analysis-synthesis-framework-for-unison-singing-2009.09875</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-learning-based-analysis-synthesis-framework-for-unison-singing-2009.09875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-learning-based-analysis-synthesis-framework-for-unison-singing-2009.09875"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-dependent-voice-activity-detection-2009.09906</loc><lastmod>2020-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-dependent-voice-activity-detection-2009.09906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-dependent-voice-activity-detection-2009.09906"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-learning-of-speech-2d-feature-trajectory-for-prosthetic-hands-2009.10283</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-learning-of-speech-2d-feature-trajectory-for-prosthetic-hands-2009.10283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-learning-of-speech-2d-feature-trajectory-for-prosthetic-hands-2009.10283"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-2009.10298</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-2009.10298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-2009.10298"/></url>
<url><loc>https://scifaro.com/en/abs/a-crowdsourced-open-source-kazakh-speech-corpus-and-initial-speech-recognition-baseline-2009.10334</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-crowdsourced-open-source-kazakh-speech-corpus-and-initial-speech-recognition-baseline-2009.10334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-crowdsourced-open-source-kazakh-speech-corpus-and-initial-speech-recognition-baseline-2009.10334"/></url>
<url><loc>https://scifaro.com/en/abs/attention-driven-fusion-for-multi-modal-emotion-recognition-2009.10991</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-driven-fusion-for-multi-modal-emotion-recognition-2009.10991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-driven-fusion-for-multi-modal-emotion-recognition-2009.10991"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-learning-algorithm-for-objective-assessment-of-hypernasality-in-children-with-cleft-palate-2009.11354</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-learning-algorithm-for-objective-assessment-of-hypernasality-in-children-with-cleft-palate-2009.11354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-learning-algorithm-for-objective-assessment-of-hypernasality-in-children-with-cleft-palate-2009.11354"/></url>
<url><loc>https://scifaro.com/en/abs/fluentnet-end-to-end-detection-of-speech-disfluency-with-deep-learning-2009.11394</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fluentnet-end-to-end-detection-of-speech-disfluency-with-deep-learning-2009.11394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fluentnet-end-to-end-detection-of-speech-disfluency-with-deep-learning-2009.11394"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-word-frequency-based-pre-and-post-processings-for-audio-captioning-2009.11436</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-word-frequency-based-pre-and-post-processings-for-audio-captioning-2009.11436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-word-frequency-based-pre-and-post-processings-for-audio-captioning-2009.11436"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-dataset-for-amateur-vocal-percussion-analysis-2009.11737</loc><lastmod>2020-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-dataset-for-amateur-vocal-percussion-analysis-2009.11737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-dataset-for-amateur-vocal-percussion-analysis-2009.11737"/></url>
<url><loc>https://scifaro.com/en/abs/deep-autoencoding-gmm-based-unsupervised-anomaly-detection-in-acoustic-signals-and-its-hyper-parameter-optimization-2009.12042</loc><lastmod>2020-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-autoencoding-gmm-based-unsupervised-anomaly-detection-in-acoustic-signals-and-its-hyper-parameter-optimization-2009.12042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-autoencoding-gmm-based-unsupervised-anomaly-detection-in-acoustic-signals-and-its-hyper-parameter-optimization-2009.12042"/></url>
<url><loc>https://scifaro.com/en/abs/a-consolidated-view-of-loss-functions-for-supervised-deep-learning-based-speech-enhancement-2009.12286</loc><lastmod>2020-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-consolidated-view-of-loss-functions-for-supervised-deep-learning-based-speech-enhancement-2009.12286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-consolidated-view-of-loss-functions-for-supervised-deep-learning-based-speech-enhancement-2009.12286"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-capsule-network-for-end-to-end-speaker-recognition-in-the-wild-2009.13480</loc><lastmod>2020-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-capsule-network-for-end-to-end-speaker-recognition-in-the-wild-2009.13480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-capsule-network-for-end-to-end-speaker-recognition-in-the-wild-2009.13480"/></url>
<url><loc>https://scifaro.com/en/abs/static-and-dynamic-measures-of-active-music-listening-as-indicators-of-depression-risk-2009.13685</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/static-and-dynamic-measures-of-active-music-listening-as-indicators-of-depression-risk-2009.13685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/static-and-dynamic-measures-of-active-music-listening-as-indicators-of-depression-risk-2009.13685"/></url>
<url><loc>https://scifaro.com/en/abs/neural-language-modeling-with-implicit-cache-pointers-2009.13774</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-language-modeling-with-implicit-cache-pointers-2009.13774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-language-modeling-with-implicit-cache-pointers-2009.13774"/></url>
<url><loc>https://scifaro.com/en/abs/clova-baseline-system-for-the-voxceleb-speaker-recognition-challenge-2020-2009.14153</loc><lastmod>2020-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clova-baseline-system-for-the-voxceleb-speaker-recognition-challenge-2020-2009.14153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clova-baseline-system-for-the-voxceleb-speaker-recognition-challenge-2020-2009.14153"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-from-speech-synthesis-to-voice-conversion-with-non-parallel-training-data-2009.14399</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-from-speech-synthesis-to-voice-conversion-with-non-parallel-training-data-2009.14399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-from-speech-synthesis-to-voice-conversion-with-non-parallel-training-data-2009.14399"/></url>
<url><loc>https://scifaro.com/en/abs/embedded-emotions-a-data-driven-approach-to-learn-transferable-feature-representations-from-raw-speech-input-for-emotion-recognition-2009.14523</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedded-emotions-a-data-driven-approach-to-learn-transferable-feature-representations-from-raw-speech-input-for-emotion-recognition-2009.14523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedded-emotions-a-data-driven-approach-to-learn-transferable-feature-representations-from-raw-speech-input-for-emotion-recognition-2009.14523"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-from-monolingual-asr-to-transcription-free-cross-lingual-voice-conversion-2009.14668</loc><lastmod>2020-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-from-monolingual-asr-to-transcription-free-cross-lingual-voice-conversion-2009.14668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-from-monolingual-asr-to-transcription-free-cross-lingual-voice-conversion-2009.14668"/></url>
<url><loc>https://scifaro.com/en/abs/event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.00140</loc><lastmod>2020-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.00140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/event-independent-network-for-polyphonic-sound-event-localization-and-detection-2010.00140"/></url>
<url><loc>https://scifaro.com/en/abs/sesqa-semi-supervised-learning-for-speech-quality-assessment-2010.00368</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sesqa-semi-supervised-learning-for-speech-quality-assessment-2010.00368"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sesqa-semi-supervised-learning-for-speech-quality-assessment-2010.00368"/></url>
<url><loc>https://scifaro.com/en/abs/helicality-an-isomap-based-measure-of-octave-equivalence-in-audio-data-2010.00673</loc><lastmod>2020-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/helicality-an-isomap-based-measure-of-octave-equivalence-in-audio-data-2010.00673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/helicality-an-isomap-based-measure-of-octave-equivalence-in-audio-data-2010.00673"/></url>
<url><loc>https://scifaro.com/en/abs/training-strategies-to-handle-missing-modalities-for-audio-visual-expression-recognition-2010.00734</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-strategies-to-handle-missing-modalities-for-audio-visual-expression-recognition-2010.00734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-strategies-to-handle-missing-modalities-for-audio-visual-expression-recognition-2010.00734"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-piano-transcription-using-autoregressive-multi-state-note-model-2010.01104</loc><lastmod>2020-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-piano-transcription-using-autoregressive-multi-state-note-model-2010.01104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-piano-transcription-using-autoregressive-multi-state-note-model-2010.01104"/></url>
<url><loc>https://scifaro.com/en/abs/cardioxnet-a-novel-lightweight-deep-learning-framework-for-cardiovascular-disease-classification-using-heart-sound-recordings-2010.01392</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cardioxnet-a-novel-lightweight-deep-learning-framework-for-cardiovascular-disease-classification-using-heart-sound-recordings-2010.01392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cardioxnet-a-novel-lightweight-deep-learning-framework-for-cardiovascular-disease-classification-using-heart-sound-recordings-2010.01392"/></url>
<url><loc>https://scifaro.com/en/abs/d3net-densely-connected-multidilated-densenet-for-music-source-separation-2010.01733</loc><lastmod>2021-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/d3net-densely-connected-multidilated-densenet-for-music-source-separation-2010.01733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/d3net-densely-connected-multidilated-densenet-for-music-source-separation-2010.01733"/></url>
<url><loc>https://scifaro.com/en/abs/jsss-free-japanese-speech-corpus-for-summarization-and-simplification-2010.01793</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jsss-free-japanese-speech-corpus-for-summarization-and-simplification-2010.01793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jsss-free-japanese-speech-corpus-for-summarization-and-simplification-2010.01793"/></url>
<url><loc>https://scifaro.com/en/abs/improving-device-directedness-classification-of-utterances-with-semantic-lexical-features-2010.01949</loc><lastmod>2020-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-device-directedness-classification-of-utterances-with-semantic-lexical-features-2010.01949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-device-directedness-classification-of-utterances-with-semantic-lexical-features-2010.01949"/></url>
<url><loc>https://scifaro.com/en/abs/the-sequence-to-sequence-baseline-for-the-voice-conversion-challenge-2020-cascading-asr-and-tts-2010.02434</loc><lastmod>2020-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sequence-to-sequence-baseline-for-the-voice-conversion-challenge-2020-cascading-asr-and-tts-2010.02434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sequence-to-sequence-baseline-for-the-voice-conversion-challenge-2020-cascading-asr-and-tts-2010.02434"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-deep-learning-framework-for-short-duration-speaker-verification-in-adverse-environments-2010.02477</loc><lastmod>2020-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-deep-learning-framework-for-short-duration-speaker-verification-in-adverse-environments-2010.02477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-deep-learning-framework-for-short-duration-speaker-verification-in-adverse-environments-2010.02477"/></url>
<url><loc>https://scifaro.com/en/abs/the-academia-sinica-systems-of-voice-conversion-for-vcc2020-2010.02669</loc><lastmod>2020-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-academia-sinica-systems-of-voice-conversion-for-vcc2020-2010.02669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-academia-sinica-systems-of-voice-conversion-for-vcc2020-2010.02669"/></url>
<url><loc>https://scifaro.com/en/abs/digital-voicing-of-silent-speech-2010.02960</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-voicing-of-silent-speech-2010.02960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-voicing-of-silent-speech-2010.02960"/></url>
<url><loc>https://scifaro.com/en/abs/pkwrap-a-pytorch-package-for-lf-mmi-training-of-acoustic-models-2010.03466</loc><lastmod>2020-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pkwrap-a-pytorch-package-for-lf-mmi-training-of-acoustic-models-2010.03466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pkwrap-a-pytorch-package-for-lf-mmi-training-of-acoustic-models-2010.03466"/></url>
<url><loc>https://scifaro.com/en/abs/latent-linguistic-embedding-for-cross-lingual-text-to-speech-and-voice-conversion-2010.03717</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-linguistic-embedding-for-cross-lingual-text-to-speech-and-voice-conversion-2010.03717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-linguistic-embedding-for-cross-lingual-text-to-speech-and-voice-conversion-2010.03717"/></url>
<url><loc>https://scifaro.com/en/abs/hlt-nus-submission-for-nist-2019-multimedia-speaker-recognition-evaluation-2010.03905</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hlt-nus-submission-for-nist-2019-multimedia-speaker-recognition-evaluation-2010.03905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hlt-nus-submission-for-nist-2019-multimedia-speaker-recognition-evaluation-2010.03905"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-speech-with-and-without-face-mask-using-acoustic-features-2010.03907</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-speech-with-and-without-face-mask-using-acoustic-features-2010.03907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-speech-with-and-without-face-mask-using-acoustic-features-2010.03907"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-invariant-speaker-embeddings-for-speaker-identification-with-emotional-speech-2010.03909</loc><lastmod>2020-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-invariant-speaker-embeddings-for-speaker-identification-with-emotional-speech-2010.03909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-invariant-speaker-embeddings-for-speaker-identification-with-emotional-speech-2010.03909"/></url>
<url><loc>https://scifaro.com/en/abs/fastvc-fast-voice-conversion-with-non-parallel-data-2010.04185</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastvc-fast-voice-conversion-with-non-parallel-data-2010.04185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastvc-fast-voice-conversion-with-non-parallel-data-2010.04185"/></url>
<url><loc>https://scifaro.com/en/abs/gender-domain-adaptation-for-automatic-speech-recognition-task-2010.04224</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-domain-adaptation-for-automatic-speech-recognition-task-2010.04224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-domain-adaptation-for-automatic-speech-recognition-task-2010.04224"/></url>
<url><loc>https://scifaro.com/en/abs/all-for-one-and-one-for-all-improving-music-separation-by-bridging-networks-2010.04228</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-for-one-and-one-for-all-improving-music-separation-by-bridging-networks-2010.04228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-for-one-and-one-for-all-improving-music-separation-by-bridging-networks-2010.04228"/></url>
<url><loc>https://scifaro.com/en/abs/randomized-overdrive-neural-networks-2010.04237</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/randomized-overdrive-neural-networks-2010.04237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/randomized-overdrive-neural-networks-2010.04237"/></url>
<url><loc>https://scifaro.com/en/abs/the-nu-voice-conversion-system-for-the-voice-conversion-challenge-2020-on-the-effectiveness-of-sequence-to-sequence-models-and-autoregressive-neural-vocoders-2010.04446</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nu-voice-conversion-system-for-the-voice-conversion-challenge-2020-on-the-effectiveness-of-sequence-to-sequence-models-and-autoregressive-neural-vocoders-2010.04446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nu-voice-conversion-system-for-the-voice-conversion-challenge-2020-on-the-effectiveness-of-sequence-to-sequence-models-and-autoregressive-neural-vocoders-2010.04446"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-music-super-resolution-using-generative-adversarial-networks-2010.04506</loc><lastmod>2020-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-music-super-resolution-using-generative-adversarial-networks-2010.04506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-music-super-resolution-using-generative-adversarial-networks-2010.04506"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-inpainting-with-deep-learning-2010.04556</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-inpainting-with-deep-learning-2010.04556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-inpainting-with-deep-learning-2010.04556"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-coded-speech-using-a-mask-based-post-filter-2010.05571</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-coded-speech-using-a-mask-based-post-filter-2010.05571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-coded-speech-using-a-mask-based-post-filter-2010.05571"/></url>
<url><loc>https://scifaro.com/en/abs/designing-a-9-channel-location-microphone-from-scratch-2010.05877</loc><lastmod>2020-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/designing-a-9-channel-location-microphone-from-scratch-2010.05877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/designing-a-9-channel-location-microphone-from-scratch-2010.05877"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-universal-speech-attributes-for-speaker-verification-with-an-improved-cross-stitch-network-2010.06248</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-universal-speech-attributes-for-speaker-verification-with-an-improved-cross-stitch-network-2010.06248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-universal-speech-attributes-for-speaker-verification-with-an-improved-cross-stitch-network-2010.06248"/></url>
<url><loc>https://scifaro.com/en/abs/novel-architectures-for-unsupervised-information-bottleneck-based-speaker-diarization-of-meetings-2010.06304</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-architectures-for-unsupervised-information-bottleneck-based-speaker-diarization-of-meetings-2010.06304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-architectures-for-unsupervised-information-bottleneck-based-speaker-diarization-of-meetings-2010.06304"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-based-on-crnn-using-rectangular-filters-and-channel-rotation-data-augmentation-2010.06422</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-based-on-crnn-using-rectangular-filters-and-channel-rotation-data-augmentation-2010.06422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-based-on-crnn-using-rectangular-filters-and-channel-rotation-data-augmentation-2010.06422"/></url>
<url><loc>https://scifaro.com/en/abs/towards-data-efficient-modeling-for-wake-word-spotting-2010.06659</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-data-efficient-modeling-for-wake-word-spotting-2010.06659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-data-efficient-modeling-for-wake-word-spotting-2010.06659"/></url>
<url><loc>https://scifaro.com/en/abs/on-front-end-gain-invariant-modeling-for-wake-word-spotting-2010.06676</loc><lastmod>2020-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-front-end-gain-invariant-modeling-for-wake-word-spotting-2010.06676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-front-end-gain-invariant-modeling-for-wake-word-spotting-2010.06676"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-end-to-end-speech-recognition-from-raw-audio-data-using-sinc-convolutions-2010.07597</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-end-to-end-speech-recognition-from-raw-audio-data-using-sinc-convolutions-2010.07597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-end-to-end-speech-recognition-from-raw-audio-data-using-sinc-convolutions-2010.07597"/></url>
<url><loc>https://scifaro.com/en/abs/muse-multi-modal-target-speaker-extraction-with-visual-cues-2010.07775</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muse-multi-modal-target-speaker-extraction-with-visual-cues-2010.07775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muse-multi-modal-target-speaker-extraction-with-visual-cues-2010.07775"/></url>
<url><loc>https://scifaro.com/en/abs/dataset-artefacts-in-anti-spoofing-systems-a-case-study-on-the-asvspoof-2017-benchmark-2010.07913</loc><lastmod>2020-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dataset-artefacts-in-anti-spoofing-systems-a-case-study-on-the-asvspoof-2017-benchmark-2010.07913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dataset-artefacts-in-anti-spoofing-systems-a-case-study-on-the-asvspoof-2017-benchmark-2010.07913"/></url>
<url><loc>https://scifaro.com/en/abs/tongji-university-team-for-the-voxceleb-speaker-recognition-challenge-2020-2010.08179</loc><lastmod>2020-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tongji-university-team-for-the-voxceleb-speaker-recognition-challenge-2020-2010.08179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tongji-university-team-for-the-voxceleb-speaker-recognition-challenge-2020-2010.08179"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-manifest-huntington-disease-using-vowel-distortion-measures-2010.08503</loc><lastmod>2020-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-manifest-huntington-disease-using-vowel-distortion-measures-2010.08503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-manifest-huntington-disease-using-vowel-distortion-measures-2010.08503"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-using-automatic-speech-recognition-derived-measures-2010.08574</loc><lastmod>2021-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-using-automatic-speech-recognition-derived-measures-2010.08574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-using-automatic-speech-recognition-derived-measures-2010.08574"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-spectro-temporal-receptive-fields-for-robust-voice-type-discrimination-2010.09151</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-spectro-temporal-receptive-fields-for-robust-voice-type-discrimination-2010.09151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-spectro-temporal-receptive-fields-for-robust-voice-type-discrimination-2010.09151"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-target-speech-extraction-with-channel-decorrelation-and-target-speaker-adaptation-2010.09191</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-target-speech-extraction-with-channel-decorrelation-and-target-speaker-adaptation-2010.09191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-target-speech-extraction-with-channel-decorrelation-and-target-speaker-adaptation-2010.09191"/></url>
<url><loc>https://scifaro.com/en/abs/didispeech-a-large-scale-mandarin-speech-corpus-2010.09275</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/didispeech-a-large-scale-mandarin-speech-corpus-2010.09275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/didispeech-a-large-scale-mandarin-speech-corpus-2010.09275"/></url>
<url><loc>https://scifaro.com/en/abs/reduce-and-reconstruct-asr-for-low-resource-phonetic-languages-2010.09322</loc><lastmod>2021-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduce-and-reconstruct-asr-for-low-resource-phonetic-languages-2010.09322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduce-and-reconstruct-asr-for-low-resource-phonetic-languages-2010.09322"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-text-to-speech-using-latent-duration-based-on-vq-vae-2010.09602</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-text-to-speech-using-latent-duration-based-on-vq-vae-2010.09602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-text-to-speech-using-latent-duration-based-on-vq-vae-2010.09602"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-multi-scale-temporal-convolution-2010.09960</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-multi-scale-temporal-convolution-2010.09960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-keyword-spotting-with-multi-scale-temporal-convolution-2010.09960"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-multitrack-mixing-with-a-differentiable-mixing-console-of-neural-audio-effects-2010.10291</loc><lastmod>2020-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-multitrack-mixing-with-a-differentiable-mixing-console-of-neural-audio-effects-2010.10291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-multitrack-mixing-with-a-differentiable-mixing-console-of-neural-audio-effects-2010.10291"/></url>
<url><loc>https://scifaro.com/en/abs/pushing-the-limits-of-semi-supervised-learning-for-automatic-speech-recognition-2010.10504</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pushing-the-limits-of-semi-supervised-learning-for-automatic-speech-recognition-2010.10504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pushing-the-limits-of-semi-supervised-learning-for-automatic-speech-recognition-2010.10504"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-transfer-for-efficient-on-device-false-trigger-mitigation-2010.10591</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-transfer-for-efficient-on-device-false-trigger-mitigation-2010.10591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-transfer-for-efficient-on-device-false-trigger-mitigation-2010.10591"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-frequency-bandwidth-extension-2010.10677</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-frequency-bandwidth-extension-2010.10677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-frequency-bandwidth-extension-2010.10677"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-covid-19-through-the-analysis-of-vocal-fold-oscillations-2010.10707</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-covid-19-through-the-analysis-of-vocal-fold-oscillations-2010.10707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-covid-19-through-the-analysis-of-vocal-fold-oscillations-2010.10707"/></url>
<url><loc>https://scifaro.com/en/abs/learning-disentangled-phone-and-speaker-representations-in-a-semi-supervised-vq-vae-paradigm-2010.10727</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-disentangled-phone-and-speaker-representations-in-a-semi-supervised-vq-vae-paradigm-2010.10727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-disentangled-phone-and-speaker-representations-in-a-semi-supervised-vq-vae-paradigm-2010.10727"/></url>
<url><loc>https://scifaro.com/en/abs/bert-for-joint-multichannel-speech-dereverberation-with-spatial-aware-tasks-2010.10892</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bert-for-joint-multichannel-speech-dereverberation-with-spatial-aware-tasks-2010.10892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bert-for-joint-multichannel-speech-dereverberation-with-spatial-aware-tasks-2010.10892"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-metric-learning-for-text-independent-speaker-verification-2010.10919</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-metric-learning-for-text-independent-speaker-verification-2010.10919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-metric-learning-for-text-independent-speaker-verification-2010.10919"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-scaling-adaptation-for-target-speech-extraction-2010.10923</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-scaling-adaptation-for-target-speech-extraction-2010.10923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-scaling-adaptation-for-target-speech-extraction-2010.10923"/></url>
<url><loc>https://scifaro.com/en/abs/the-upc-speaker-verification-system-submitted-to-voxceleb-speaker-recognition-challenge-2020-voxsrc-20-2010.10937</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-upc-speaker-verification-system-submitted-to-voxceleb-speaker-recognition-challenge-2020-voxsrc-20-2010.10937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-upc-speaker-verification-system-submitted-to-voxceleb-speaker-recognition-challenge-2020-voxsrc-20-2010.10937"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-the-recitative-problem-in-real-time-opera-tracking-2010.11013</loc><lastmod>2020-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-the-recitative-problem-in-real-time-opera-tracking-2010.11013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-the-recitative-problem-in-real-time-opera-tracking-2010.11013"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-training-of-automatic-speech-recognition-for-nigerian-pidgin-2010.11123</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-training-of-automatic-speech-recognition-for-nigerian-pidgin-2010.11123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-training-of-automatic-speech-recognition-for-nigerian-pidgin-2010.11123"/></url>
<url><loc>https://scifaro.com/en/abs/fastemit-low-latency-streaming-asr-with-sequence-level-emission-regularization-2010.11148</loc><lastmod>2021-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastemit-low-latency-streaming-asr-with-sequence-level-emission-regularization-2010.11148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastemit-low-latency-streaming-asr-with-sequence-level-emission-regularization-2010.11148"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speaker-embedding-from-text-to-speech-2010.11221</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speaker-embedding-from-text-to-speech-2010.11221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speaker-embedding-from-text-to-speech-2010.11221"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-anomalies-recognition-using-temporal-convolutional-attention-network-2010.11286</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-anomalies-recognition-using-temporal-convolutional-attention-network-2010.11286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-anomalies-recognition-using-temporal-convolutional-attention-network-2010.11286"/></url>
<url><loc>https://scifaro.com/en/abs/robust-text-dependent-speaker-verification-via-character-level-information-preservation-for-the-sdsv-challenge-2020-2010.11408</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-text-dependent-speaker-verification-via-character-level-information-preservation-for-the-sdsv-challenge-2020-2010.11408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-text-dependent-speaker-verification-via-character-level-information-preservation-for-the-sdsv-challenge-2020-2010.11408"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-estimation-for-attention-based-sequence-to-sequence-models-for-speech-recognition-2010.11428</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-estimation-for-attention-based-sequence-to-sequence-models-for-speech-recognition-2010.11428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-estimation-for-attention-based-sequence-to-sequence-models-for-speech-recognition-2010.11428"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-representation-learning-for-speaker-recognition-via-contrastive-equilibrium-learning-2010.11433</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-representation-learning-for-speaker-recognition-via-contrastive-equilibrium-learning-2010.11433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-representation-learning-for-speaker-recognition-via-contrastive-equilibrium-learning-2010.11433"/></url>
<url><loc>https://scifaro.com/en/abs/momentum-contrast-speaker-representation-learning-2010.11457</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/momentum-contrast-speaker-representation-learning-2010.11457"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/momentum-contrast-speaker-representation-learning-2010.11457"/></url>
<url><loc>https://scifaro.com/en/abs/microsoft-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2020-2010.11458</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microsoft-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2020-2010.11458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microsoft-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2020-2010.11458"/></url>
<url><loc>https://scifaro.com/en/abs/similarity-analysis-of-self-supervised-speech-representations-2010.11481</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/similarity-analysis-of-self-supervised-speech-representations-2010.11481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/similarity-analysis-of-self-supervised-speech-representations-2010.11481"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-approach-to-joint-speech-and-accent-recognition-with-dnn-hmm-framework-2010.11483</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-approach-to-joint-speech-and-accent-recognition-with-dnn-hmm-framework-2010.11483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-approach-to-joint-speech-and-accent-recognition-with-dnn-hmm-framework-2010.11483"/></url>
<url><loc>https://scifaro.com/en/abs/the-ntu-aisg-text-to-speech-system-for-blizzard-challenge-2020-2010.11489</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ntu-aisg-text-to-speech-system-for-blizzard-challenge-2020-2010.11489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ntu-aisg-text-to-speech-system-for-blizzard-challenge-2020-2010.11489"/></url>
<url><loc>https://scifaro.com/en/abs/graph-attention-networks-for-speaker-verification-2010.11543</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-attention-networks-for-speaker-verification-2010.11543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-attention-networks-for-speaker-verification-2010.11543"/></url>
<url><loc>https://scifaro.com/en/abs/how-similar-or-different-is-rakugo-speech-synthesizer-to-professional-performers-2010.11549</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-similar-or-different-is-rakugo-speech-synthesizer-to-professional-performers-2010.11549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-similar-or-different-is-rakugo-speech-synthesizer-to-professional-performers-2010.11549"/></url>
<url><loc>https://scifaro.com/en/abs/dbnet-doa-driven-beamforming-network-for-end-to-end-farfield-sound-source-separation-2010.11566</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dbnet-doa-driven-beamforming-network-for-end-to-end-farfield-sound-source-separation-2010.11566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dbnet-doa-driven-beamforming-network-for-end-to-end-farfield-sound-source-separation-2010.11566"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-the-but-diarization-system-for-voxconverse-challenge-2010.11718</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-the-but-diarization-system-for-voxconverse-challenge-2010.11718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-the-but-diarization-system-for-voxconverse-challenge-2010.11718"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-low-power-on-chip-learning-of-speech-commands-with-phase-change-memories-2010.11741</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-low-power-on-chip-learning-of-speech-commands-with-phase-change-memories-2010.11741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-low-power-on-chip-learning-of-speech-commands-with-phase-change-memories-2010.11741"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-loss-based-speech-denoising-with-an-ensemble-of-audio-pattern-recognition-and-self-supervised-models-2010.11860</loc><lastmod>2020-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-loss-based-speech-denoising-with-an-ensemble-of-audio-pattern-recognition-and-self-supervised-models-2010.11860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-loss-based-speech-denoising-with-an-ensemble-of-audio-pattern-recognition-and-self-supervised-models-2010.11860"/></url>
<url><loc>https://scifaro.com/en/abs/scene-agnostic-multi-microphone-speech-dereverberation-2010.11875</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scene-agnostic-multi-microphone-speech-dereverberation-2010.11875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scene-agnostic-multi-microphone-speech-dereverberation-2010.11875"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-singing-voice-synthesis-with-perceptual-entropy-loss-2010.12024</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-singing-voice-synthesis-with-perceptual-entropy-loss-2010.12024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-singing-voice-synthesis-with-perceptual-entropy-loss-2010.12024"/></url>
<url><loc>https://scifaro.com/en/abs/how-phonotactics-affect-multilingual-and-zero-shot-asr-performance-2010.12104</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-phonotactics-affect-multilingual-and-zero-shot-asr-performance-2010.12104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-phonotactics-affect-multilingual-and-zero-shot-asr-performance-2010.12104"/></url>
<url><loc>https://scifaro.com/en/abs/a-cross-verification-approach-for-protecting-world-leaders-from-fake-and-tampered-audio-2010.12173</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cross-verification-approach-for-protecting-world-leaders-from-fake-and-tampered-audio-2010.12173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cross-verification-approach-for-protecting-world-leaders-from-fake-and-tampered-audio-2010.12173"/></url>
<url><loc>https://scifaro.com/en/abs/toward-expressive-singing-voice-correction-on-perceptual-validity-of-evaluation-metrics-for-vocal-melody-extraction-2010.12196</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-expressive-singing-voice-correction-on-perceptual-validity-of-evaluation-metrics-for-vocal-melody-extraction-2010.12196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-expressive-singing-voice-correction-on-perceptual-validity-of-evaluation-metrics-for-vocal-melody-extraction-2010.12196"/></url>
<url><loc>https://scifaro.com/en/abs/any-to-one-sequence-to-sequence-voice-conversion-using-self-supervised-discrete-speech-representations-2010.12231</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/any-to-one-sequence-to-sequence-voice-conversion-using-self-supervised-discrete-speech-representations-2010.12231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/any-to-one-sequence-to-sequence-voice-conversion-using-self-supervised-discrete-speech-representations-2010.12231"/></url>
<url><loc>https://scifaro.com/en/abs/training-noisy-single-channel-speech-separation-with-noisy-oracle-sources-a-large-gap-and-a-small-step-2010.12430</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-noisy-single-channel-speech-separation-with-noisy-oracle-sources-a-large-gap-and-a-small-step-2010.12430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-noisy-single-channel-speech-separation-with-noisy-oracle-sources-a-large-gap-and-a-small-step-2010.12430"/></url>
<url><loc>https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2020-system-description-2010.12468</loc><lastmod>2020-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2020-system-description-2010.12468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2020-system-description-2010.12468"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-aided-end-to-end-multi-task-learning-for-voice-activity-detection-2010.12484</loc><lastmod>2021-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-aided-end-to-end-multi-task-learning-for-voice-activity-detection-2010.12484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-aided-end-to-end-multi-task-learning-for-voice-activity-detection-2010.12484"/></url>
<url><loc>https://scifaro.com/en/abs/speakernet-1d-depth-wise-separable-convolutional-network-for-text-independent-speaker-recognition-and-verification-2010.12653</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speakernet-1d-depth-wise-separable-convolutional-network-for-text-independent-speaker-recognition-and-verification-2010.12653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speakernet-1d-depth-wise-separable-convolutional-network-for-text-independent-speaker-recognition-and-verification-2010.12653"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speaker-verification-for-single-and-multi-talker-speech-2010.12692</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speaker-verification-for-single-and-multi-talker-speech-2010.12692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speaker-verification-for-single-and-multi-talker-speech-2010.12692"/></url>
<url><loc>https://scifaro.com/en/abs/improving-noise-robustness-of-an-end-to-end-neural-model-for-automatic-speech-recognition-2010.12715</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-noise-robustness-of-an-end-to-end-neural-model-for-automatic-speech-recognition-2010.12715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-noise-robustness-of-an-end-to-end-neural-model-for-automatic-speech-recognition-2010.12715"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-dukeece-systems-for-voxceleb-speaker-recognition-challenge-2020-2010.12731</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-dukeece-systems-for-voxceleb-speaker-recognition-challenge-2020-2010.12731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-dukeece-systems-for-voxceleb-speaker-recognition-challenge-2020-2010.12731"/></url>
<url><loc>https://scifaro.com/en/abs/x-tasnet-robust-and-accurate-time-domain-speaker-extraction-network-2010.12766</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-tasnet-robust-and-accurate-time-domain-speaker-extraction-network-2010.12766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-tasnet-robust-and-accurate-time-domain-speaker-extraction-network-2010.12766"/></url>
<url><loc>https://scifaro.com/en/abs/y-vector-multiscale-waveform-encoder-for-speaker-embedding-2010.12951</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/y-vector-multiscale-waveform-encoder-for-speaker-embedding-2010.12951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/y-vector-multiscale-waveform-encoder-for-speaker-embedding-2010.12951"/></url>
<url><loc>https://scifaro.com/en/abs/probing-acoustic-representations-for-phonetic-properties-2010.13007</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-acoustic-representations-for-phonetic-properties-2010.13007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-acoustic-representations-for-phonetic-properties-2010.13007"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourcing-approach-for-subjective-evaluation-of-echo-impairment-2010.13063</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourcing-approach-for-subjective-evaluation-of-echo-impairment-2010.13063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourcing-approach-for-subjective-evaluation-of-echo-impairment-2010.13063"/></url>
<url><loc>https://scifaro.com/en/abs/attention-is-all-you-need-in-speech-separation-2010.13154</loc><lastmod>2021-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-is-all-you-need-in-speech-separation-2010.13154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-is-all-you-need-in-speech-separation-2010.13154"/></url>
<url><loc>https://scifaro.com/en/abs/subjective-evaluation-of-noise-suppression-algorithms-in-crowdsourcing-2010.13200</loc><lastmod>2021-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subjective-evaluation-of-noise-suppression-algorithms-in-crowdsourcing-2010.13200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subjective-evaluation-of-noise-suppression-algorithms-in-crowdsourcing-2010.13200"/></url>
<url><loc>https://scifaro.com/en/abs/improved-mask-ctc-for-non-autoregressive-end-to-end-asr-2010.13270</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-mask-ctc-for-non-autoregressive-end-to-end-asr-2010.13270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-mask-ctc-for-non-autoregressive-end-to-end-asr-2010.13270"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-node-specific-block-diagonal-lcmv-beamforming-in-wireless-acoustic-sensor-networks-2010.13334</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-node-specific-block-diagonal-lcmv-beamforming-in-wireless-acoustic-sensor-networks-2010.13334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-node-specific-block-diagonal-lcmv-beamforming-in-wireless-acoustic-sensor-networks-2010.13334"/></url>
<url><loc>https://scifaro.com/en/abs/improving-pronunciation-assessment-via-ordinal-regression-with-anchored-reference-samples-2010.13339</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-pronunciation-assessment-via-ordinal-regression-with-anchored-reference-samples-2010.13339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-pronunciation-assessment-via-ordinal-regression-with-anchored-reference-samples-2010.13339"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-controllable-speech-synthesis-using-emotion-unlabeled-dataset-with-the-assistance-of-cross-domain-speech-emotion-recognition-2010.13350</loc><lastmod>2021-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-controllable-speech-synthesis-using-emotion-unlabeled-dataset-with-the-assistance-of-cross-domain-speech-emotion-recognition-2010.13350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-controllable-speech-synthesis-using-emotion-unlabeled-dataset-with-the-assistance-of-cross-domain-speech-emotion-recognition-2010.13350"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-end-to-end-neural-and-clustering-based-diarization-getting-the-best-of-both-worlds-2010.13366</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-end-to-end-neural-and-clustering-based-diarization-getting-the-best-of-both-worlds-2010.13366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-end-to-end-neural-and-clustering-based-diarization-getting-the-best-of-both-worlds-2010.13366"/></url>
<url><loc>https://scifaro.com/en/abs/tts-by-tts-tts-driven-data-augmentation-for-fast-and-high-quality-speech-synthesis-2010.13421</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-by-tts-tts-driven-data-augmentation-for-fast-and-high-quality-speech-synthesis-2010.13421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-by-tts-tts-driven-data-augmentation-for-fast-and-high-quality-speech-synthesis-2010.13421"/></url>
<url><loc>https://scifaro.com/en/abs/improving-sound-event-detection-metrics-insights-from-dcase-2020-2010.13648</loc><lastmod>2020-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-sound-event-detection-metrics-insights-from-dcase-2020-2010.13648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-sound-event-detection-metrics-insights-from-dcase-2020-2010.13648"/></url>
<url><loc>https://scifaro.com/en/abs/marblenet-deep-1d-time-channel-separable-convolutional-neural-network-for-voice-activity-detection-2010.13886</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/marblenet-deep-1d-time-channel-separable-convolutional-neural-network-for-voice-activity-detection-2010.13886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/marblenet-deep-1d-time-channel-separable-convolutional-neural-network-for-voice-activity-detection-2010.13886"/></url>
<url><loc>https://scifaro.com/en/abs/recent-developments-on-espnet-toolkit-boosted-by-conformer-2010.13956</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-developments-on-espnet-toolkit-boosted-by-conformer-2010.13956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-developments-on-espnet-toolkit-boosted-by-conformer-2010.13956"/></url>
<url><loc>https://scifaro.com/en/abs/one-class-learning-towards-synthetic-voice-spoofing-detection-2010.13995</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-class-learning-towards-synthetic-voice-spoofing-detection-2010.13995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-class-learning-towards-synthetic-voice-spoofing-detection-2010.13995"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-speech-enhancement-using-realisation-of-complex-valued-lstm-2010.14122</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-using-realisation-of-complex-valued-lstm-2010.14122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-speech-enhancement-using-realisation-of-complex-valued-lstm-2010.14122"/></url>
<url><loc>https://scifaro.com/en/abs/fragmentvc-any-to-any-voice-conversion-by-end-to-end-extracting-and-fusing-fine-grained-voice-fragments-with-attention-2010.14150</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fragmentvc-any-to-any-voice-conversion-by-end-to-end-extracting-and-fusing-fine-grained-voice-fragments-with-attention-2010.14150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fragmentvc-any-to-any-voice-conversion-by-end-to-end-extracting-and-fusing-fine-grained-voice-fragments-with-attention-2010.14150"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-waveform-synthesis-based-on-generative-adversarial-networks-with-voicing-aware-conditional-discriminators-2010.14151</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-waveform-synthesis-based-on-generative-adversarial-networks-with-voicing-aware-conditional-discriminators-2010.14151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-waveform-synthesis-based-on-generative-adversarial-networks-with-voicing-aware-conditional-discriminators-2010.14151"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-discrete-latent-variable-models-for-speech-representation-learning-2010.14230</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-discrete-latent-variable-models-for-speech-representation-learning-2010.14230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-discrete-latent-variable-models-for-speech-representation-learning-2010.14230"/></url>
<url><loc>https://scifaro.com/en/abs/align-refine-non-autoregressive-speech-recognition-via-iterative-realignment-2010.14233</loc><lastmod>2020-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/align-refine-non-autoregressive-speech-recognition-via-iterative-realignment-2010.14233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/align-refine-non-autoregressive-speech-recognition-via-iterative-realignment-2010.14233"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-echo-cancellation-with-the-dual-signal-transformation-lstm-network-2010.14337</loc><lastmod>2020-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-with-the-dual-signal-transformation-lstm-network-2010.14337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-with-the-dual-signal-transformation-lstm-network-2010.14337"/></url>
<url><loc>https://scifaro.com/en/abs/sslide-sound-source-localization-for-indoors-based-on-deep-learning-2010.14420</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sslide-sound-source-localization-for-indoors-based-on-deep-learning-2010.14420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sslide-sound-source-localization-for-indoors-based-on-deep-learning-2010.14420"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-encoders-for-unifying-streaming-and-non-streaming-asr-2010.14606</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-encoders-for-unifying-streaming-and-non-streaming-asr-2010.14606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-encoders-for-unifying-streaming-and-non-streaming-asr-2010.14606"/></url>
<url><loc>https://scifaro.com/en/abs/cass-nat-ctc-alignment-based-single-step-non-autoregressive-transformer-for-speech-recognition-2010.14725</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cass-nat-ctc-alignment-based-single-step-non-autoregressive-transformer-for-speech-recognition-2010.14725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cass-nat-ctc-alignment-based-single-step-non-autoregressive-transformer-for-speech-recognition-2010.14725"/></url>
<url><loc>https://scifaro.com/en/abs/an-iterative-framework-for-self-supervised-deep-speaker-representation-learning-2010.14751</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-iterative-framework-for-self-supervised-deep-speaker-representation-learning-2010.14751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-iterative-framework-for-self-supervised-deep-speaker-representation-learning-2010.14751"/></url>
<url><loc>https://scifaro.com/en/abs/effective-decoder-masking-for-transformer-based-end-to-end-speech-recognition-2010.14764</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-decoder-masking-for-transformer-based-end-to-end-speech-recognition-2010.14764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-decoder-masking-for-transformer-based-end-to-end-speech-recognition-2010.14764"/></url>
<url><loc>https://scifaro.com/en/abs/one-in-a-hundred-select-the-best-predicted-sequence-from-numerous-candidates-for-streaming-speech-recognition-2010.14791</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-in-a-hundred-select-the-best-predicted-sequence-from-numerous-candidates-for-streaming-speech-recognition-2010.14791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-in-a-hundred-select-the-best-predicted-sequence-from-numerous-candidates-for-streaming-speech-recognition-2010.14791"/></url>
<url><loc>https://scifaro.com/en/abs/replay-and-synthetic-speech-detection-with-res2net-architecture-2010.15006</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/replay-and-synthetic-speech-detection-with-res2net-architecture-2010.15006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/replay-and-synthetic-speech-detection-with-res2net-architecture-2010.15006"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-short-time-fourier-transform-parameters-via-gradient-descent-2010.15049</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-short-time-fourier-transform-parameters-via-gradient-descent-2010.15049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-short-time-fourier-transform-parameters-via-gradient-descent-2010.15049"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-and-control-using-differentiable-dsp-2010.15084</loc><lastmod>2020-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-and-control-using-differentiable-dsp-2010.15084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-and-control-using-differentiable-dsp-2010.15084"/></url>
<url><loc>https://scifaro.com/en/abs/accdoa-activity-coupled-cartesian-direction-of-arrival-representation-for-sound-event-localization-and-detection-2010.15306</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accdoa-activity-coupled-cartesian-direction-of-arrival-representation-for-sound-event-localization-and-detection-2010.15306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accdoa-activity-coupled-cartesian-direction-of-arrival-representation-for-sound-event-localization-and-detection-2010.15306"/></url>
<url><loc>https://scifaro.com/en/abs/devicetts-a-small-footprint-fast-stable-network-for-on-device-text-to-speech-2010.15311</loc><lastmod>2021-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/devicetts-a-small-footprint-fast-stable-network-for-on-device-text-to-speech-2010.15311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/devicetts-a-small-footprint-fast-stable-network-for-on-device-text-to-speech-2010.15311"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-voice-trigger-detection-accuracy-vs-latency-2010.15446</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-voice-trigger-detection-accuracy-vs-latency-2010.15446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-voice-trigger-detection-accuracy-vs-latency-2010.15446"/></url>
<url><loc>https://scifaro.com/en/abs/fullsubnet-a-full-band-and-sub-band-fusion-model-for-real-time-single-channel-speech-enhancement-2010.15508</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fullsubnet-a-full-band-and-sub-band-fusion-model-for-real-time-single-channel-speech-enhancement-2010.15508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fullsubnet-a-full-band-and-sub-band-fusion-model-for-real-time-single-channel-speech-enhancement-2010.15508"/></url>
<url><loc>https://scifaro.com/en/abs/unetgan-a-robust-speech-enhancement-approach-in-time-domain-for-extremely-low-signal-to-noise-ratio-condition-2010.15521</loc><lastmod>2020-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unetgan-a-robust-speech-enhancement-approach-in-time-domain-for-extremely-low-signal-to-noise-ratio-condition-2010.15521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unetgan-a-robust-speech-enhancement-approach-in-time-domain-for-extremely-low-signal-to-noise-ratio-condition-2010.15521"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-defense-for-deep-speaker-recognition-using-hybrid-adversarial-training-2010.16038</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-defense-for-deep-speaker-recognition-using-hybrid-adversarial-training-2010.16038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-defense-for-deep-speaker-recognition-using-hybrid-adversarial-training-2010.16038"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-speaker-role-recognition-and-speaker-enrollment-protocol-for-conversational-clinical-interviews-2010.16131</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-speaker-role-recognition-and-speaker-enrollment-protocol-for-conversational-clinical-interviews-2010.16131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-speaker-role-recognition-and-speaker-enrollment-protocol-for-conversational-clinical-interviews-2010.16131"/></url>
<url><loc>https://scifaro.com/en/abs/beamforming-for-measurements-under-disturbed-propagation-conditions-using-numerically-calculated-green-s-functions-2010.16140</loc><lastmod>2020-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beamforming-for-measurements-under-disturbed-propagation-conditions-using-numerically-calculated-green-s-functions-2010.16140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beamforming-for-measurements-under-disturbed-propagation-conditions-using-numerically-calculated-green-s-functions-2010.16140"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-glottal-flow-dynamics-for-detecting-covid-19-from-voice-2010.16318</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-glottal-flow-dynamics-for-detecting-covid-19-from-voice-2010.16318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-glottal-flow-dynamics-for-detecting-covid-19-from-voice-2010.16318"/></url>
<url><loc>https://scifaro.com/en/abs/audio-dequantization-using-co-sparse-non-convex-methods-2010.16386</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-dequantization-using-co-sparse-non-convex-methods-2010.16386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-dequantization-using-co-sparse-non-convex-methods-2010.16386"/></url>
<url><loc>https://scifaro.com/en/abs/a-curated-dataset-of-urban-scenes-for-audio-visual-scene-analysis-2011.00030</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-curated-dataset-of-urban-scenes-for-audio-visual-scene-analysis-2011.00030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-curated-dataset-of-urban-scenes-for-audio-visual-scene-analysis-2011.00030"/></url>
<url><loc>https://scifaro.com/en/abs/directional-asr-a-new-paradigm-for-e2e-multi-speaker-speech-recognition-with-source-localization-2011.00091</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-asr-a-new-paradigm-for-e2e-multi-speaker-speech-recognition-with-source-localization-2011.00091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-asr-a-new-paradigm-for-e2e-multi-speaker-speech-recognition-with-source-localization-2011.00091"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-urban-sound-tagging-with-spatiotemporal-context-2011.00175</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-urban-sound-tagging-with-spatiotemporal-context-2011.00175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-urban-sound-tagging-with-spatiotemporal-context-2011.00175"/></url>
<url><loc>https://scifaro.com/en/abs/again-vc-a-one-shot-voice-conversion-using-activation-guidance-and-adaptive-instance-normalization-2011.00316</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/again-vc-a-one-shot-voice-conversion-using-activation-guidance-and-adaptive-instance-normalization-2011.00316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/again-vc-a-one-shot-voice-conversion-using-activation-guidance-and-adaptive-instance-normalization-2011.00316"/></url>
<url><loc>https://scifaro.com/en/abs/focusing-phenomena-in-linear-discrete-inverse-problems-in-acoustics-2011.00502</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/focusing-phenomena-in-linear-discrete-inverse-problems-in-acoustics-2011.00502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/focusing-phenomena-in-linear-discrete-inverse-problems-in-acoustics-2011.00502"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-arabic-dialect-identification-2011.00699</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-arabic-dialect-identification-2011.00699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-arabic-dialect-identification-2011.00699"/></url>
<url><loc>https://scifaro.com/en/abs/robust-raw-waveform-speech-recognition-using-relevance-weighted-representations-2011.00721</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-raw-waveform-speech-recognition-using-relevance-weighted-representations-2011.00721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-raw-waveform-speech-recognition-using-relevance-weighted-representations-2011.00721"/></url>
<url><loc>https://scifaro.com/en/abs/feathertts-robust-and-efficient-attention-based-neural-tts-2011.00935</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feathertts-robust-and-efficient-attention-based-neural-tts-2011.00935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feathertts-robust-and-efficient-attention-based-neural-tts-2011.00935"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-anti-spoofing-with-rawnet2-2011.01108</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-anti-spoofing-with-rawnet2-2011.01108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-anti-spoofing-with-rawnet2-2011.01108"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymisation-using-the-mcadams-coefficient-2011.01130</loc><lastmod>2021-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymisation-using-the-mcadams-coefficient-2011.01130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymisation-using-the-mcadams-coefficient-2011.01130"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-maximize-speech-quality-directly-using-mos-prediction-for-neural-text-to-speech-2011.01174</loc><lastmod>2022-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-maximize-speech-quality-directly-using-mos-prediction-for-neural-text-to-speech-2011.01174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-maximize-speech-quality-directly-using-mos-prediction-for-neural-text-to-speech-2011.01174"/></url>
<url><loc>https://scifaro.com/en/abs/camp-a-two-stage-approach-to-modelling-prosody-in-context-2011.01175</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/camp-a-two-stage-approach-to-modelling-prosody-in-context-2011.01175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/camp-a-two-stage-approach-to-modelling-prosody-in-context-2011.01175"/></url>
<url><loc>https://scifaro.com/en/abs/focus-on-the-present-a-regularization-method-for-the-asr-source-target-attention-layer-2011.01210</loc><lastmod>2020-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/focus-on-the-present-a-regularization-method-for-the-asr-source-target-attention-layer-2011.01210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/focus-on-the-present-a-regularization-method-for-the-asr-source-target-attention-layer-2011.01210"/></url>
<url><loc>https://scifaro.com/en/abs/stylemelgan-an-efficient-high-fidelity-adversarial-vocoder-with-temporal-adaptive-normalization-2011.01557</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylemelgan-an-efficient-high-fidelity-adversarial-vocoder-with-temporal-adaptive-normalization-2011.01557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylemelgan-an-efficient-high-fidelity-adversarial-vocoder-with-temporal-adaptive-normalization-2011.01557"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-latency-speech-recognition-with-asynchronous-revision-2011.01570</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-latency-speech-recognition-with-asynchronous-revision-2011.01570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-latency-speech-recognition-with-asynchronous-revision-2011.01570"/></url>
<url><loc>https://scifaro.com/en/abs/improving-rnn-transducer-with-normalized-jointer-network-2011.01576</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-rnn-transducer-with-normalized-jointer-network-2011.01576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-rnn-transducer-with-normalized-jointer-network-2011.01576"/></url>
<url><loc>https://scifaro.com/en/abs/learning-explicit-prosody-models-and-deep-speaker-embeddings-for-atypical-voice-conversion-2011.01678</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-explicit-prosody-models-and-deep-speaker-embeddings-for-atypical-voice-conversion-2011.01678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-explicit-prosody-models-and-deep-speaker-embeddings-for-atypical-voice-conversion-2011.01678"/></url>
<url><loc>https://scifaro.com/en/abs/improved-end-to-end-dysarthric-speech-recognition-via-meta-learning-based-model-re-initialization-2011.01686</loc><lastmod>2020-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-end-to-end-dysarthric-speech-recognition-via-meta-learning-based-model-re-initialization-2011.01686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-end-to-end-dysarthric-speech-recognition-via-meta-learning-based-model-re-initialization-2011.01686"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-incorporating-articulatory-movement-information-in-speech-enhancement-2011.01691</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-incorporating-articulatory-movement-information-in-speech-enhancement-2011.01691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-incorporating-articulatory-movement-information-in-speech-enhancement-2011.01691"/></url>
<url><loc>https://scifaro.com/en/abs/short-time-deep-learning-based-source-separation-for-speech-enhancement-in-reverberant-environments-with-beamforming-2011.01965</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/short-time-deep-learning-based-source-separation-for-speech-enhancement-in-reverberant-environments-with-beamforming-2011.01965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/short-time-deep-learning-based-source-separation-for-speech-enhancement-in-reverberant-environments-with-beamforming-2011.01965"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-pattern-discovery-from-thematic-speech-archives-based-on-multilingual-bottleneck-features-2011.01986</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-pattern-discovery-from-thematic-speech-archives-based-on-multilingual-bottleneck-features-2011.01986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-pattern-discovery-from-thematic-speech-archives-based-on-multilingual-bottleneck-features-2011.01986"/></url>
<url><loc>https://scifaro.com/en/abs/internal-language-model-estimation-for-domain-adaptive-end-to-end-speech-recognition-2011.01991</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/internal-language-model-estimation-for-domain-adaptive-end-to-end-speech-recognition-2011.01991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/internal-language-model-estimation-for-domain-adaptive-end-to-end-speech-recognition-2011.01991"/></url>
<url><loc>https://scifaro.com/en/abs/dover-lap-a-method-for-combining-overlap-aware-diarization-outputs-2011.01997</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dover-lap-a-method-for-combining-overlap-aware-diarization-outputs-2011.01997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dover-lap-a-method-for-combining-overlap-aware-diarization-outputs-2011.01997"/></url>
<url><loc>https://scifaro.com/en/abs/complex-ratio-masking-for-singing-voice-separation-2011.02008</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-ratio-masking-for-singing-voice-separation-2011.02008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-ratio-masking-for-singing-voice-separation-2011.02008"/></url>
<url><loc>https://scifaro.com/en/abs/integration-of-speech-separation-diarization-and-recognition-for-multi-speaker-meetings-system-description-comparison-and-analysis-2011.02014</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integration-of-speech-separation-diarization-and-recognition-for-multi-speaker-meetings-system-description-comparison-and-analysis-2011.02014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integration-of-speech-separation-diarization-and-recognition-for-multi-speaker-meetings-system-description-comparison-and-analysis-2011.02014"/></url>
<url><loc>https://scifaro.com/en/abs/frustratingly-easy-noise-aware-training-of-acoustic-models-2011.02090</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frustratingly-easy-noise-aware-training-of-acoustic-models-2011.02090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frustratingly-easy-noise-aware-training-of-acoustic-models-2011.02090"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speaker-extraction-network-based-on-iterative-refined-adaptation-2011.02102</loc><lastmod>2022-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speaker-extraction-network-based-on-iterative-refined-adaptation-2011.02102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speaker-extraction-network-based-on-iterative-refined-adaptation-2011.02102"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multi-task-network-for-delay-estimation-and-echo-cancellation-2011.02109</loc><lastmod>2022-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multi-task-network-for-delay-estimation-and-echo-cancellation-2011.02109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multi-task-network-for-delay-estimation-and-echo-cancellation-2011.02109"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-transformers-utterance-level-code-switching-detection-2011.02132</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-transformers-utterance-level-code-switching-detection-2011.02132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-transformers-utterance-level-code-switching-detection-2011.02132"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-representation-learning-for-speech-and-audio-signals-based-on-relevance-weighting-2011.02136</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-representation-learning-for-speech-and-audio-signals-based-on-relevance-weighting-2011.02136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-representation-learning-for-speech-and-audio-signals-based-on-relevance-weighting-2011.02136"/></url>
<url><loc>https://scifaro.com/en/abs/learning-in-your-voice-non-parallel-voice-conversion-based-on-speaker-consistency-loss-2011.02168</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-in-your-voice-non-parallel-voice-conversion-based-on-speaker-consistency-loss-2011.02168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-in-your-voice-non-parallel-voice-conversion-based-on-speaker-consistency-loss-2011.02168"/></url>
<url><loc>https://scifaro.com/en/abs/prosodic-representation-learning-and-contextual-sampling-for-neural-text-to-speech-2011.02252</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosodic-representation-learning-and-contextual-sampling-for-neural-text-to-speech-2011.02252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosodic-representation-learning-and-contextual-sampling-for-neural-text-to-speech-2011.02252"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-conditional-audio-filtering-of-arbitrary-sounds-2011.02421</loc><lastmod>2020-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-conditional-audio-filtering-of-arbitrary-sounds-2011.02421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-conditional-audio-filtering-of-arbitrary-sounds-2011.02421"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-channel-temporal-attention-convolutional-neural-network-model-for-environmental-sound-classification-2011.02561</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-channel-temporal-attention-convolutional-neural-network-model-for-environmental-sound-classification-2011.02561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-channel-temporal-attention-convolutional-neural-network-model-for-environmental-sound-classification-2011.02561"/></url>
<url><loc>https://scifaro.com/en/abs/don-t-look-back-an-online-beat-tracking-method-using-rnn-and-enhanced-particle-filtering-2011.02619</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/don-t-look-back-an-online-beat-tracking-method-using-rnn-and-enhanced-particle-filtering-2011.02619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/don-t-look-back-an-online-beat-tracking-method-using-rnn-and-enhanced-particle-filtering-2011.02619"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-study-on-infant-parent-voice-diarization-2011.02698</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-study-on-infant-parent-voice-diarization-2011.02698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-study-on-infant-parent-voice-diarization-2011.02698"/></url>
<url><loc>https://scifaro.com/en/abs/multi-accent-adaptation-based-on-gate-mechanism-2011.02774</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-accent-adaptation-based-on-gate-mechanism-2011.02774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-accent-adaptation-based-on-gate-mechanism-2011.02774"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-using-class-similarity-for-robust-speech-recognition-2011.02782</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-using-class-similarity-for-robust-speech-recognition-2011.02782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-using-class-similarity-for-robust-speech-recognition-2011.02782"/></url>
<url><loc>https://scifaro.com/en/abs/multi-class-spectral-clustering-with-overlaps-for-speaker-diarization-2011.02900</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-class-spectral-clustering-with-overlaps-for-speaker-diarization-2011.02900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-class-spectral-clustering-with-overlaps-for-speaker-diarization-2011.02900"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-bayes-risk-training-for-end-to-end-speaker-attributed-asr-2011.02921</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-bayes-risk-training-for-end-to-end-speaker-attributed-asr-2011.02921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-bayes-risk-training-for-end-to-end-speaker-attributed-asr-2011.02921"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-as-a-simple-binary-classification-problem-with-careful-selection-of-proxy-outlier-examples-2011.02949</loc><lastmod>2020-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-as-a-simple-binary-classification-problem-with-careful-selection-of-proxy-outlier-examples-2011.02949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-as-a-simple-binary-classification-problem-with-careful-selection-of-proxy-outlier-examples-2011.02949"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-end-to-end-multi-channel-asr-with-bias-information-for-meeting-transcription-2011.03110</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-end-to-end-multi-channel-asr-with-bias-information-for-meeting-transcription-2011.03110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-end-to-end-multi-channel-asr-with-bias-information-for-meeting-transcription-2011.03110"/></url>
<url><loc>https://scifaro.com/en/abs/a-hierarchical-subspace-model-for-language-attuned-acoustic-unit-discovery-2011.03115</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hierarchical-subspace-model-for-language-attuned-acoustic-unit-discovery-2011.03115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hierarchical-subspace-model-for-language-attuned-acoustic-unit-discovery-2011.03115"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-bottleneck-features-for-improving-asr-performance-of-code-switched-speech-in-under-resourced-languages-2011.03118</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-bottleneck-features-for-improving-asr-performance-of-code-switched-speech-in-under-resourced-languages-2011.03118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-bottleneck-features-for-improving-asr-performance-of-code-switched-speech-in-under-resourced-languages-2011.03118"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-from-contrastive-mixtures-for-personalized-speech-enhancement-2011.03426</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-from-contrastive-mixtures-for-personalized-speech-enhancement-2011.03426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-from-contrastive-mixtures-for-personalized-speech-enhancement-2011.03426"/></url>
<url><loc>https://scifaro.com/en/abs/misalignment-recognition-in-acoustic-sensor-networks-using-a-semi-supervised-source-estimation-method-and-markov-random-fields-2011.03432</loc><lastmod>2020-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/misalignment-recognition-in-acoustic-sensor-networks-using-a-semi-supervised-source-estimation-method-and-markov-random-fields-2011.03432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/misalignment-recognition-in-acoustic-sensor-networks-using-a-semi-supervised-source-estimation-method-and-markov-random-fields-2011.03432"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-se-end-to-end-speech-enhancement-and-separation-toolkit-designed-for-asr-integration-2011.03706</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-se-end-to-end-speech-enhancement-and-separation-toolkit-designed-for-asr-integration-2011.03706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-se-end-to-end-speech-enhancement-and-separation-toolkit-designed-for-asr-integration-2011.03706"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-by-postfiltering-for-speech-and-audio-coding-in-ad-hoc-sensor-networks-2011.03810</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-by-postfiltering-for-speech-and-audio-coding-in-ad-hoc-sensor-networks-2011.03810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-by-postfiltering-for-speech-and-audio-coding-in-ad-hoc-sensor-networks-2011.03810"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-style-modeling-transfer-and-prediction-in-text-to-speech-synthesis-via-phone-level-content-style-disentanglement-2011.03943</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-style-modeling-transfer-and-prediction-in-text-to-speech-synthesis-via-phone-level-content-style-disentanglement-2011.03943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-style-modeling-transfer-and-prediction-in-text-to-speech-synthesis-via-phone-level-content-style-disentanglement-2011.03943"/></url>
<url><loc>https://scifaro.com/en/abs/listen-look-and-deliberate-visual-context-aware-speech-recognition-using-pre-trained-text-video-representations-2011.04084</loc><lastmod>2020-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-look-and-deliberate-visual-context-aware-speech-recognition-using-pre-trained-text-video-representations-2011.04084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-look-and-deliberate-visual-context-aware-speech-recognition-using-pre-trained-text-video-representations-2011.04084"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-of-visual-features-for-dnn-based-audio-visual-speech-enhancement-in-multi-talker-environments-2011.04359</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-of-visual-features-for-dnn-based-audio-visual-speech-enhancement-in-multi-talker-environments-2011.04359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-of-visual-features-for-dnn-based-audio-visual-speech-enhancement-in-multi-talker-environments-2011.04359"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-training-data-generation-for-phase-based-doa-estimation-2011.04456</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-training-data-generation-for-phase-based-doa-estimation-2011.04456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-training-data-generation-for-phase-based-doa-estimation-2011.04456"/></url>
<url><loc>https://scifaro.com/en/abs/informed-source-extraction-with-application-to-acoustic-echo-reduction-2011.04569</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/informed-source-extraction-with-application-to-acoustic-echo-reduction-2011.04569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/informed-source-extraction-with-application-to-acoustic-echo-reduction-2011.04569"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-lf-mmi-ctc-and-rnn-t-criteria-for-streaming-asr-2011.04785</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-lf-mmi-ctc-and-rnn-t-criteria-for-streaming-asr-2011.04785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-lf-mmi-ctc-and-rnn-t-criteria-for-streaming-asr-2011.04785"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-text-independent-speaker-verification-based-on-the-ge2e-method-2011.04896</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-text-independent-speaker-verification-based-on-the-ge2e-method-2011.04896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-text-independent-speaker-verification-based-on-the-ge2e-method-2011.04896"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-low-quality-voice-recordings-using-disentangled-channel-factor-and-neural-waveform-model-2011.05038</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-low-quality-voice-recordings-using-disentangled-channel-factor-and-neural-waveform-model-2011.05038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-low-quality-voice-recordings-using-disentangled-channel-factor-and-neural-waveform-model-2011.05038"/></url>
<url><loc>https://scifaro.com/en/abs/improving-prosody-modelling-with-cross-utterance-bert-embeddings-for-end-to-end-speech-synthesis-2011.05161</loc><lastmod>2020-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-prosody-modelling-with-cross-utterance-bert-embeddings-for-end-to-end-speech-synthesis-2011.05161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-prosody-modelling-with-cross-utterance-bert-embeddings-for-end-to-end-speech-synthesis-2011.05161"/></url>
<url><loc>https://scifaro.com/en/abs/surrogate-source-model-learning-for-determined-source-separation-2011.05540</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surrogate-source-model-learning-for-determined-source-separation-2011.05540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surrogate-source-model-learning-for-determined-source-separation-2011.05540"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-neural-architecture-search-for-end-to-end-speech-recognition-via-straight-through-gradients-2011.05649</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-neural-architecture-search-for-end-to-end-speech-recognition-via-straight-through-gradients-2011.05649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-neural-architecture-search-for-end-to-end-speech-recognition-via-straight-through-gradients-2011.05649"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-expressive-text-to-speech-using-data-augmentation-2011.05707</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-expressive-text-to-speech-using-data-augmentation-2011.05707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-expressive-text-to-speech-using-data-augmentation-2011.05707"/></url>
<url><loc>https://scifaro.com/en/abs/fastsvc-fast-cross-domain-singing-voice-conversion-with-feature-wise-linear-modulation-2011.05731</loc><lastmod>2021-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastsvc-fast-cross-domain-singing-voice-conversion-with-feature-wise-linear-modulation-2011.05731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastsvc-fast-cross-domain-singing-voice-conversion-with-feature-wise-linear-modulation-2011.05731"/></url>
<url><loc>https://scifaro.com/en/abs/on-end-to-end-multi-channel-time-domain-speech-separation-in-reverberant-environments-2011.05958</loc><lastmod>2020-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-end-to-end-multi-channel-time-domain-speech-separation-in-reverberant-environments-2011.05958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-end-to-end-multi-channel-time-domain-speech-separation-in-reverberant-environments-2011.05958"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-knowledge-distillation-for-rnn-transducer-models-2011.06110</loc><lastmod>2020-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-knowledge-distillation-for-rnn-transducer-models-2011.06110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-knowledge-distillation-for-rnn-transducer-models-2011.06110"/></url>
<url><loc>https://scifaro.com/en/abs/the-cuhk-tudelft-system-for-the-slt-2021-children-speech-recognition-challenge-2011.06239</loc><lastmod>2020-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-cuhk-tudelft-system-for-the-slt-2021-children-speech-recognition-challenge-2011.06239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-cuhk-tudelft-system-for-the-slt-2021-children-speech-recognition-challenge-2011.06239"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-prosody-modeling-for-non-autoregressive-speech-synthesis-2011.06465</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-prosody-modeling-for-non-autoregressive-speech-synthesis-2011.06465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-prosody-modeling-for-non-autoregressive-speech-synthesis-2011.06465"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-the-intelligibility-benefits-of-neural-speech-enrichment-for-listeners-with-normal-hearing-and-hearing-impairment-using-the-greek-harvard-corpus-2011.06548</loc><lastmod>2020-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-the-intelligibility-benefits-of-neural-speech-enrichment-for-listeners-with-normal-hearing-and-hearing-impairment-using-the-greek-harvard-corpus-2011.06548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-the-intelligibility-benefits-of-neural-speech-enrichment-for-listeners-with-normal-hearing-and-hearing-impairment-using-the-greek-harvard-corpus-2011.06548"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-dilated-cnn-models-for-depression-detection-using-inverted-vocal-tract-variables-2011.06739</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-dilated-cnn-models-for-depression-detection-using-inverted-vocal-tract-variables-2011.06739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-dilated-cnn-models-for-depression-detection-using-inverted-vocal-tract-variables-2011.06739"/></url>
<url><loc>https://scifaro.com/en/abs/on-filter-generalization-for-music-bandwidth-extension-using-deep-neural-networks-2011.07274</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-filter-generalization-for-music-bandwidth-extension-using-deep-neural-networks-2011.07274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-filter-generalization-for-music-bandwidth-extension-using-deep-neural-networks-2011.07274"/></url>
<url><loc>https://scifaro.com/en/abs/distortion-controlled-training-for-end-to-end-reverberant-speech-separation-with-auxiliary-autoencoding-loss-2011.07338</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distortion-controlled-training-for-end-to-end-reverberant-speech-separation-with-auxiliary-autoencoding-loss-2011.07338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distortion-controlled-training-for-end-to-end-reverberant-speech-separation-with-auxiliary-autoencoding-loss-2011.07338"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-dysarthric-speech-detection-exploiting-pairwise-distance-based-convolutional-neural-networks-2011.07545</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-dysarthric-speech-detection-exploiting-pairwise-distance-based-convolutional-neural-networks-2011.07545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-dysarthric-speech-detection-exploiting-pairwise-distance-based-convolutional-neural-networks-2011.07545"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-single-channel-speech-enhancement-using-speech-presence-probability-as-a-secondary-task-training-target-2011.07547</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-single-channel-speech-enhancement-using-speech-presence-probability-as-a-secondary-task-training-target-2011.07547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-single-channel-speech-enhancement-using-speech-presence-probability-as-a-secondary-task-training-target-2011.07547"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-multi-channel-integration-and-recognition-of-overlapped-speech-2011.07755</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-multi-channel-integration-and-recognition-of-overlapped-speech-2011.07755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-multi-channel-integration-and-recognition-of-overlapped-speech-2011.07755"/></url>
<url><loc>https://scifaro.com/en/abs/block-online-guided-source-separation-2011.07791</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/block-online-guided-source-separation-2011.07791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/block-online-guided-source-separation-2011.07791"/></url>
<url><loc>https://scifaro.com/en/abs/a-general-network-architecture-for-sound-event-localization-and-detection-using-transfer-learning-and-recurrent-neural-network-2011.07859</loc><lastmod>2020-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-general-network-architecture-for-sound-event-localization-and-detection-using-transfer-learning-and-recurrent-neural-network-2011.07859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-general-network-architecture-for-sound-event-localization-and-detection-using-transfer-learning-and-recurrent-neural-network-2011.07859"/></url>
<url><loc>https://scifaro.com/en/abs/refining-automatic-speech-recognition-system-for-older-adults-2011.08346</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refining-automatic-speech-recognition-system-for-older-adults-2011.08346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refining-automatic-speech-recognition-system-for-older-adults-2011.08346"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-lightweight-speech-separation-via-group-communication-2011.08397</loc><lastmod>2020-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-lightweight-speech-separation-via-group-communication-2011.08397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-lightweight-speech-separation-via-group-communication-2011.08397"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-the-separation-layers-in-speech-separation-networks-2011.08400</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-the-separation-layers-in-speech-separation-networks-2011.08400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-the-separation-layers-in-speech-separation-networks-2011.08400"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-filter-and-sum-network-for-multi-channel-speech-separation-2011.08401</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-filter-and-sum-network-for-multi-channel-speech-separation-2011.08401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-filter-and-sum-network-for-multi-channel-speech-separation-2011.08401"/></url>
<url><loc>https://scifaro.com/en/abs/s-transformer-segment-transformer-for-robust-neural-speech-synthesis-2011.08480</loc><lastmod>2020-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s-transformer-segment-transformer-for-robust-neural-speech-synthesis-2011.08480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s-transformer-segment-transformer-for-robust-neural-speech-synthesis-2011.08480"/></url>
<url><loc>https://scifaro.com/en/abs/tie-your-embeddings-down-cross-modal-latent-spaces-for-end-to-end-spoken-language-understanding-2011.09044</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tie-your-embeddings-down-cross-modal-latent-spaces-for-end-to-end-spoken-language-understanding-2011.09044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tie-your-embeddings-down-cross-modal-latent-spaces-for-end-to-end-spoken-language-understanding-2011.09044"/></url>
<url><loc>https://scifaro.com/en/abs/wpd-an-improved-neural-beamformer-for-simultaneous-speech-separation-and-dereverberation-2011.09162</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wpd-an-improved-neural-beamformer-for-simultaneous-speech-separation-and-dereverberation-2011.09162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wpd-an-improved-neural-beamformer-for-simultaneous-speech-separation-and-dereverberation-2011.09162"/></url>
<url><loc>https://scifaro.com/en/abs/respiratory-distress-detection-from-telephone-speech-using-acoustic-and-prosodic-features-2011.09270</loc><lastmod>2020-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/respiratory-distress-detection-from-telephone-speech-using-acoustic-and-prosodic-features-2011.09270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/respiratory-distress-detection-from-telephone-speech-using-acoustic-and-prosodic-features-2011.09270"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stage-speaker-extraction-with-utterance-and-frame-level-reference-signals-2011.09624</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stage-speaker-extraction-with-utterance-and-frame-level-reference-signals-2011.09624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stage-speaker-extraction-with-utterance-and-frame-level-reference-signals-2011.09624"/></url>
<url><loc>https://scifaro.com/en/abs/universal-melgan-a-robust-neural-vocoder-for-high-fidelity-waveform-generation-in-multiple-domains-2011.09631</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-melgan-a-robust-neural-vocoder-for-high-fidelity-waveform-generation-in-multiple-domains-2011.09631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-melgan-a-robust-neural-vocoder-for-high-fidelity-waveform-generation-in-multiple-domains-2011.09631"/></url>
<url><loc>https://scifaro.com/en/abs/tal-a-synchronised-multi-speaker-corpus-of-ultrasound-tongue-imaging-audio-and-lip-videos-2011.09804</loc><lastmod>2020-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tal-a-synchronised-multi-speaker-corpus-of-ultrasound-tongue-imaging-audio-and-lip-videos-2011.09804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tal-a-synchronised-multi-speaker-corpus-of-ultrasound-tongue-imaging-audio-and-lip-videos-2011.09804"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-single-microphone-speech-enhancement-2011.10345</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-single-microphone-speech-enhancement-2011.10345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-single-microphone-speech-enhancement-2011.10345"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-neural-affinity-score-fusion-2011.10527</loc><lastmod>2020-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-neural-affinity-score-fusion-2011.10527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-neural-affinity-score-fusion-2011.10527"/></url>
<url><loc>https://scifaro.com/en/abs/improving-rnn-t-asr-accuracy-using-context-audio-2011.10538</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-rnn-t-asr-accuracy-using-context-audio-2011.10538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-rnn-t-asr-accuracy-using-context-audio-2011.10538"/></url>
<url><loc>https://scifaro.com/en/abs/speech-denoising-with-auditory-models-2011.10706</loc><lastmod>2025-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-denoising-with-auditory-models-2011.10706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-denoising-with-auditory-models-2011.10706"/></url>
<url><loc>https://scifaro.com/en/abs/a-better-and-faster-end-to-end-model-for-streaming-asr-2011.10798</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-better-and-faster-end-to-end-model-for-streaming-asr-2011.10798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-better-and-faster-end-to-end-model-for-streaming-asr-2011.10798"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-silent-speech-recognition-with-acoustic-sensing-2011.11315</loc><lastmod>2020-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-silent-speech-recognition-with-acoustic-sensing-2011.11315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-silent-speech-recognition-with-acoustic-sensing-2011.11315"/></url>
<url><loc>https://scifaro.com/en/abs/using-synthetic-audio-to-improve-the-recognition-of-out-of-vocabulary-words-in-end-to-end-asr-systems-2011.11564</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-synthetic-audio-to-improve-the-recognition-of-out-of-vocabulary-words-in-end-to-end-asr-systems-2011.11564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-synthetic-audio-to-improve-the-recognition-of-out-of-vocabulary-words-in-end-to-end-asr-systems-2011.11564"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-multi-speaker-asr-with-rnn-t-2011.11671</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-multi-speaker-asr-with-rnn-t-2011.11671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-multi-speaker-asr-with-rnn-t-2011.11671"/></url>
<url><loc>https://scifaro.com/en/abs/synth2aug-cross-domain-speaker-recognition-with-tts-synthesized-speech-2011.11818</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synth2aug-cross-domain-speaker-recognition-with-tts-synthesized-speech-2011.11818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synth2aug-cross-domain-speaker-recognition-with-tts-synthesized-speech-2011.11818"/></url>
<url><loc>https://scifaro.com/en/abs/integration-of-variational-autoencoder-and-spatial-clustering-for-adaptive-multi-channel-neural-speech-separation-2011.11984</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integration-of-variational-autoencoder-and-spatial-clustering-for-adaptive-multi-channel-neural-speech-separation-2011.11984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integration-of-variational-autoencoder-and-spatial-clustering-for-adaptive-multi-channel-neural-speech-separation-2011.11984"/></url>
<url><loc>https://scifaro.com/en/abs/how-far-are-we-from-robust-voice-conversion-a-survey-2011.12063</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-far-are-we-from-robust-voice-conversion-a-survey-2011.12063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-far-are-we-from-robust-voice-conversion-a-survey-2011.12063"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-classification-via-semantic-embeddings-2011.12133</loc><lastmod>2021-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-classification-via-semantic-embeddings-2011.12133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-classification-via-semantic-embeddings-2011.12133"/></url>
<url><loc>https://scifaro.com/en/abs/tfgan-time-and-frequency-domain-based-generative-adversarial-network-for-high-fidelity-speech-synthesis-2011.12206</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tfgan-time-and-frequency-domain-based-generative-adversarial-network-for-high-fidelity-speech-synthesis-2011.12206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tfgan-time-and-frequency-domain-based-generative-adversarial-network-for-high-fidelity-speech-synthesis-2011.12206"/></url>
<url><loc>https://scifaro.com/en/abs/a-light-transformer-for-speech-to-intent-applications-2011.12221</loc><lastmod>2020-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-light-transformer-for-speech-to-intent-applications-2011.12221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-light-transformer-for-speech-to-intent-applications-2011.12221"/></url>
<url><loc>https://scifaro.com/en/abs/soft-median-choice-an-automatic-feature-smoothing-method-for-sound-event-detection-2011.12564</loc><lastmod>2021-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-median-choice-an-automatic-feature-smoothing-method-for-sound-event-detection-2011.12564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-median-choice-an-automatic-feature-smoothing-method-for-sound-event-detection-2011.12564"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-classification-with-factored-linear-and-nonlinear-acoustic-semantic-projections-2011.12657</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-classification-with-factored-linear-and-nonlinear-acoustic-semantic-projections-2011.12657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-classification-with-factored-linear-and-nonlinear-acoustic-semantic-projections-2011.12657"/></url>
<url><loc>https://scifaro.com/en/abs/bootstrap-an-end-to-end-asr-system-by-multilingual-training-transfer-learning-text-to-text-mapping-and-synthetic-audio-2011.12696</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bootstrap-an-end-to-end-asr-system-by-multilingual-training-transfer-learning-text-to-text-mapping-and-synthetic-audio-2011.12696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bootstrap-an-end-to-end-asr-system-by-multilingual-training-transfer-learning-text-to-text-mapping-and-synthetic-audio-2011.12696"/></url>
<url><loc>https://scifaro.com/en/abs/small-footprint-convolutional-recurrent-networks-for-streaming-wakeword-detection-2011.12941</loc><lastmod>2020-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-footprint-convolutional-recurrent-networks-for-streaming-wakeword-detection-2011.12941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-footprint-convolutional-recurrent-networks-for-streaming-wakeword-detection-2011.12941"/></url>
<url><loc>https://scifaro.com/en/abs/voxlingua107-a-dataset-for-spoken-language-recognition-2011.12998</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxlingua107-a-dataset-for-spoken-language-recognition-2011.12998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxlingua107-a-dataset-for-spoken-language-recognition-2011.12998"/></url>
<url><loc>https://scifaro.com/en/abs/multi-quartznet-multi-resolution-convolution-for-speech-recognition-with-multi-layer-feature-fusion-2011.13090</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-quartznet-multi-resolution-convolution-for-speech-recognition-with-multi-layer-feature-fusion-2011.13090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-quartznet-multi-resolution-convolution-for-speech-recognition-with-multi-layer-feature-fusion-2011.13090"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-online-speech-recognition-with-decoder-end-adaptive-computation-steps-2011.13834</loc><lastmod>2020-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-online-speech-recognition-with-decoder-end-adaptive-computation-steps-2011.13834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-online-speech-recognition-with-decoder-end-adaptive-computation-steps-2011.13834"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-on-untranscribed-speech-2011.14060</loc><lastmod>2020-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-on-untranscribed-speech-2011.14060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-on-untranscribed-speech-2011.14060"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-based-on-re-clustering-of-hypothesized-speech-segments-with-siamese-and-triplet-networks-2011.14062</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-based-on-re-clustering-of-hypothesized-speech-segments-with-siamese-and-triplet-networks-2011.14062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-spoken-term-discovery-based-on-re-clustering-of-hypothesized-speech-segments-with-siamese-and-triplet-networks-2011.14062"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-expressions-with-multimodal-transformers-2012.00063</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-expressions-with-multimodal-transformers-2012.00063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-expressions-with-multimodal-transformers-2012.00063"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-deep-speaker-embedding-framework-for-mixed-bandwidth-speech-data-2012.00486</loc><lastmod>2020-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-deep-speaker-embedding-framework-for-mixed-bandwidth-speech-data-2012.00486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-deep-speaker-embedding-framework-for-mixed-bandwidth-speech-data-2012.00486"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-based-on-deep-learning-an-overview-2012.00931</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-based-on-deep-learning-an-overview-2012.00931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-based-on-deep-learning-an-overview-2012.00931"/></url>
<url><loc>https://scifaro.com/en/abs/the-third-dihard-diarization-challenge-2012.01477</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-third-dihard-diarization-challenge-2012.01477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-third-dihard-diarization-challenge-2012.01477"/></url>
<url><loc>https://scifaro.com/en/abs/joint-gender-and-age-estimation-based-on-speech-signals-using-x-vectors-and-transfer-learning-2012.01551</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-gender-and-age-estimation-based-on-speech-signals-using-x-vectors-and-transfer-learning-2012.01551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-gender-and-age-estimation-based-on-speech-signals-using-x-vectors-and-transfer-learning-2012.01551"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-few-shot-audio-classification-2012.01573</loc><lastmod>2020-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-few-shot-audio-classification-2012.01573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-few-shot-audio-classification-2012.01573"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-for-the-hearing-impaired-2012.02174</loc><lastmod>2021-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-for-the-hearing-impaired-2012.02174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-for-the-hearing-impaired-2012.02174"/></url>
<url><loc>https://scifaro.com/en/abs/a-correspondence-variational-autoencoder-for-unsupervised-acoustic-word-embeddings-2012.02221</loc><lastmod>2020-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-correspondence-variational-autoencoder-for-unsupervised-acoustic-word-embeddings-2012.02221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-correspondence-variational-autoencoder-for-unsupervised-acoustic-word-embeddings-2012.02221"/></url>
<url><loc>https://scifaro.com/en/abs/graphpb-graphical-representations-of-prosody-boundary-in-speech-synthesis-2012.02626</loc><lastmod>2020-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graphpb-graphical-representations-of-prosody-boundary-in-speech-synthesis-2012.02626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graphpb-graphical-representations-of-prosody-boundary-in-speech-synthesis-2012.02626"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-based-spoofing-robust-automatic-speaker-verification-system-2012.03154</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-based-spoofing-robust-automatic-speaker-verification-system-2012.03154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-based-spoofing-robust-automatic-speaker-verification-system-2012.03154"/></url>
<url><loc>https://scifaro.com/en/abs/mls-a-large-scale-multilingual-dataset-for-speech-research-2012.03411</loc><lastmod>2020-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mls-a-large-scale-multilingual-dataset-for-speech-research-2012.03411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mls-a-large-scale-multilingual-dataset-for-speech-research-2012.03411"/></url>
<url><loc>https://scifaro.com/en/abs/efficienttts-an-efficient-and-high-quality-text-to-speech-architecture-2012.03500</loc><lastmod>2020-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficienttts-an-efficient-and-high-quality-text-to-speech-architecture-2012.03500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficienttts-an-efficient-and-high-quality-text-to-speech-architecture-2012.03500"/></url>
<url><loc>https://scifaro.com/en/abs/towards-speech-enhancement-using-a-variational-u-net-architecture-2012.03594</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-speech-enhancement-using-a-variational-u-net-architecture-2012.03594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-speech-enhancement-using-a-variational-u-net-architecture-2012.03594"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-the-effects-of-dynamic-range-compression-on-signals-in-noise-2012.03860</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-the-effects-of-dynamic-range-compression-on-signals-in-noise-2012.03860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-the-effects-of-dynamic-range-compression-on-signals-in-noise-2012.03860"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-disentanglement-of-speaker-representation-for-attribute-driven-privacy-preservation-2012.04454</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-disentanglement-of-speaker-representation-for-attribute-driven-privacy-preservation-2012.04454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-disentanglement-of-speaker-representation-for-attribute-driven-privacy-preservation-2012.04454"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-learning-of-lf-mmi-trained-time-delay-neural-networks-for-speech-recognition-2012.04494</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-learning-of-lf-mmi-trained-time-delay-neural-networks-for-speech-recognition-2012.04494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-learning-of-lf-mmi-trained-time-delay-neural-networks-for-speech-recognition-2012.04494"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-communications-for-speech-signals-2012.05369</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-communications-for-speech-signals-2012.05369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-communications-for-speech-signals-2012.05369"/></url>
<url><loc>https://scifaro.com/en/abs/learning-multiple-sound-source-2d-localization-2012.05515</loc><lastmod>2020-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-multiple-sound-source-2d-localization-2012.05515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-multiple-sound-source-2d-localization-2012.05515"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-framework-for-real-world-multiple-sound-source-2d-localization-2012.05533</loc><lastmod>2021-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-framework-for-real-world-multiple-sound-source-2d-localization-2012.05533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-framework-for-real-world-multiple-sound-source-2d-localization-2012.05533"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-of-discriminators-for-domain-adaptation-in-multiple-sound-source-2d-localization-2012.05908</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-of-discriminators-for-domain-adaptation-in-multiple-sound-source-2d-localization-2012.05908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-of-discriminators-for-domain-adaptation-in-multiple-sound-source-2d-localization-2012.05908"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-geometry-calibration-from-distance-estimates-for-wireless-acoustic-sensor-networks-2012.06142</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-geometry-calibration-from-distance-estimates-for-wireless-acoustic-sensor-networks-2012.06142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-geometry-calibration-from-distance-estimates-for-wireless-acoustic-sensor-networks-2012.06142"/></url>
<url><loc>https://scifaro.com/en/abs/blind-monaural-source-separation-on-heart-and-lung-sounds-based-on-periodic-coded-deep-autoencoder-2012.06275</loc><lastmod>2020-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-monaural-source-separation-on-heart-and-lung-sounds-based-on-periodic-coded-deep-autoencoder-2012.06275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-monaural-source-separation-on-heart-and-lung-sounds-based-on-periodic-coded-deep-autoencoder-2012.06275"/></url>
<url><loc>https://scifaro.com/en/abs/decoar-2-0-deep-contextualized-acoustic-representations-with-vector-quantization-2012.06659</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoar-2-0-deep-contextualized-acoustic-representations-with-vector-quantization-2012.06659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoar-2-0-deep-contextualized-acoustic-representations-with-vector-quantization-2012.06659"/></url>
<url><loc>https://scifaro.com/en/abs/deaan-disentangled-embedding-and-adversarial-adaptation-network-for-robust-speaker-representation-learning-2012.06896</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deaan-disentangled-embedding-and-adversarial-adaptation-network-for-robust-speaker-representation-learning-2012.06896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deaan-disentangled-embedding-and-adversarial-adaptation-network-for-robust-speaker-representation-learning-2012.06896"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-text-independent-speaker-verification-using-prototypical-momentum-contrastive-learning-2012.07178</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-text-independent-speaker-verification-using-prototypical-momentum-contrastive-learning-2012.07178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-text-independent-speaker-verification-using-prototypical-momentum-contrastive-learning-2012.07178"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-adaptive-normalization-driven-multi-speaker-speech-synthesis-2012.07252</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-adaptive-normalization-driven-multi-speaker-speech-synthesis-2012.07252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-adaptive-normalization-driven-multi-speaker-speech-synthesis-2012.07252"/></url>
<url><loc>https://scifaro.com/en/abs/multi-spectrogan-high-diversity-and-high-fidelity-spectrogram-generation-with-adversarial-style-combination-for-speech-synthesis-2012.07267</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-spectrogan-high-diversity-and-high-fidelity-spectrogram-generation-with-adversarial-style-combination-for-speech-synthesis-2012.07267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-spectrogan-high-diversity-and-high-fidelity-spectrogram-generation-with-adversarial-style-combination-for-speech-synthesis-2012.07267"/></url>
<url><loc>https://scifaro.com/en/abs/group-communication-with-context-codec-for-lightweight-source-separation-2012.07291</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/group-communication-with-context-codec-for-lightweight-source-separation-2012.07291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/group-communication-with-context-codec-for-lightweight-source-separation-2012.07291"/></url>
<url><loc>https://scifaro.com/en/abs/audio-captioning-using-pre-trained-large-scale-language-model-guided-by-audio-based-similar-caption-retrieval-2012.07331</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-captioning-using-pre-trained-large-scale-language-model-guided-by-audio-based-similar-caption-retrieval-2012.07331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-captioning-using-pre-trained-large-scale-language-model-guided-by-audio-based-similar-caption-retrieval-2012.07331"/></url>
<url><loc>https://scifaro.com/en/abs/redat-accent-invariant-representation-for-end-to-end-asr-by-domain-adversarial-training-with-relabeling-2012.07353</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/redat-accent-invariant-representation-for-end-to-end-asr-by-domain-adversarial-training-with-relabeling-2012.07353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/redat-accent-invariant-representation-for-end-to-end-asr-by-domain-adversarial-training-with-relabeling-2012.07353"/></url>
<url><loc>https://scifaro.com/en/abs/av-taris-online-audio-visual-speech-recognition-2012.07467</loc><lastmod>2020-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-taris-online-audio-visual-speech-recognition-2012.07467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-taris-online-audio-visual-speech-recognition-2012.07467"/></url>
<url><loc>https://scifaro.com/en/abs/f0-based-gammatone-filtering-for-intelligibility-gain-of-acoustic-noisy-signals-2012.08227</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f0-based-gammatone-filtering-for-intelligibility-gain-of-acoustic-noisy-signals-2012.08227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f0-based-gammatone-filtering-for-intelligibility-gain-of-acoustic-noisy-signals-2012.08227"/></url>
<url><loc>https://scifaro.com/en/abs/a-synergistic-kalman-and-deep-postfiltering-approach-to-acoustic-echo-cancellation-2012.08867</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-synergistic-kalman-and-deep-postfiltering-approach-to-acoustic-echo-cancellation-2012.08867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-synergistic-kalman-and-deep-postfiltering-approach-to-acoustic-echo-cancellation-2012.08867"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-zero-shot-model-selection-2012.09359</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-zero-shot-model-selection-2012.09359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-zero-shot-model-selection-2012.09359"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-speech-and-noise-modeling-for-speech-enhancement-2012.09408</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-speech-and-noise-modeling-for-speech-enhancement-2012.09408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-speech-and-noise-modeling-for-speech-enhancement-2012.09408"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-steered-response-power-mapping-based-on-nyquist-shannon-sampling-2012.09499</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-steered-response-power-mapping-based-on-nyquist-shannon-sampling-2012.09499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-steered-response-power-mapping-based-on-nyquist-shannon-sampling-2012.09499"/></url>
<url><loc>https://scifaro.com/en/abs/the-effectiveness-of-unsupervised-subword-modeling-with-autoregressive-and-cross-lingual-phone-aware-networks-2012.09544</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effectiveness-of-unsupervised-subword-modeling-with-autoregressive-and-cross-lingual-phone-aware-networks-2012.09544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effectiveness-of-unsupervised-subword-modeling-with-autoregressive-and-cross-lingual-phone-aware-networks-2012.09544"/></url>
<url><loc>https://scifaro.com/en/abs/denoispeech-denoising-text-to-speech-with-frame-level-noise-modeling-2012.09547</loc><lastmod>2020-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoispeech-denoising-text-to-speech-with-frame-level-noise-modeling-2012.09547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoispeech-denoising-text-to-speech-with-frame-level-noise-modeling-2012.09547"/></url>
<url><loc>https://scifaro.com/en/abs/parallel-wavenet-conditioned-on-vae-latent-vectors-2012.09703</loc><lastmod>2020-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parallel-wavenet-conditioned-on-vae-latent-vectors-2012.09703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parallel-wavenet-conditioned-on-vae-latent-vectors-2012.09703"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-separation-using-speaker-inventory-for-long-multi-talker-recording-2012.09727</loc><lastmod>2020-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-separation-using-speaker-inventory-for-long-multi-talker-recording-2012.09727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-separation-using-speaker-inventory-for-long-multi-talker-recording-2012.09727"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-diarization-as-post-processing-2012.10055</loc><lastmod>2020-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-as-post-processing-2012.10055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-as-post-processing-2012.10055"/></url>
<url><loc>https://scifaro.com/en/abs/toward-streaming-asr-with-non-autoregressive-insertion-based-model-2012.10128</loc><lastmod>2021-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-streaming-asr-with-non-autoregressive-insertion-based-model-2012.10128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-streaming-asr-with-non-autoregressive-insertion-based-model-2012.10128"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-dnns-for-keyword-spotting-using-neural-architecture-search-and-quantization-2012.10138</loc><lastmod>2020-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-dnns-for-keyword-spotting-using-neural-architecture-search-and-quantization-2012.10138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-dnns-for-keyword-spotting-using-neural-architecture-search-and-quantization-2012.10138"/></url>
<url><loc>https://scifaro.com/en/abs/dccrgan-deep-complex-convolution-recurrent-generator-adversarial-network-for-speech-enhancement-2012.10732</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dccrgan-deep-complex-convolution-recurrent-generator-adversarial-network-for-speech-enhancement-2012.10732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dccrgan-deep-complex-convolution-recurrent-generator-adversarial-network-for-speech-enhancement-2012.10732"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-domainadversarial-neural-network-2012.11174</loc><lastmod>2020-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-domainadversarial-neural-network-2012.11174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-domainadversarial-neural-network-2012.11174"/></url>
<url><loc>https://scifaro.com/en/abs/the-2020-espnet-update-new-features-broadened-applications-performance-improvements-and-future-plans-2012.13006</loc><lastmod>2020-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-2020-espnet-update-new-features-broadened-applications-performance-improvements-and-future-plans-2012.13006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-2020-espnet-update-new-features-broadened-applications-performance-improvements-and-future-plans-2012.13006"/></url>
<url><loc>https://scifaro.com/en/abs/wheel-rail-interface-condition-estimation-w-rice-2012.13096</loc><lastmod>2020-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wheel-rail-interface-condition-estimation-w-rice-2012.13096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wheel-rail-interface-condition-estimation-w-rice-2012.13096"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-multi-frame-adl-mvdr-for-target-speech-separation-2012.13442</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-multi-frame-adl-mvdr-for-target-speech-separation-2012.13442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-multi-frame-adl-mvdr-for-target-speech-separation-2012.13442"/></url>
<url><loc>https://scifaro.com/en/abs/building-multi-lingual-tts-using-cross-lingual-voice-conversion-2012.14039</loc><lastmod>2020-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-multi-lingual-tts-using-cross-lingual-voice-conversion-2012.14039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-multi-lingual-tts-using-cross-lingual-voice-conversion-2012.14039"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-lexical-stress-errors-in-non-native-l2-english-with-data-augmentation-and-attention-2012.14788</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-lexical-stress-errors-in-non-native-l2-english-with-data-augmentation-and-attention-2012.14788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-lexical-stress-errors-in-non-native-l2-english-with-data-augmentation-and-attention-2012.14788"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-hmm-clustering-of-x-vector-sequences-vbx-in-speaker-diarization-theory-implementation-and-analysis-on-standard-tasks-2012.14952</loc><lastmod>2021-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-hmm-clustering-of-x-vector-sequences-vbx-in-speaker-diarization-theory-implementation-and-analysis-on-standard-tasks-2012.14952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-hmm-clustering-of-x-vector-sequences-vbx-in-speaker-diarization-theory-implementation-and-analysis-on-standard-tasks-2012.14952"/></url>
<url><loc>https://scifaro.com/en/abs/audio-content-analysis-2101.00132</loc><lastmod>2021-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-content-analysis-2101.00132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-content-analysis-2101.00132"/></url>
<url><loc>https://scifaro.com/en/abs/the-2020-personalized-voice-trigger-challenge-open-database-evaluation-metrics-and-the-baseline-systems-2101.01935</loc><lastmod>2021-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-2020-personalized-voice-trigger-challenge-open-database-evaluation-metrics-and-the-baseline-systems-2101.01935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-2020-personalized-voice-trigger-challenge-open-database-evaluation-metrics-and-the-baseline-systems-2101.01935"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-multi-task-learning-for-speech-enhancement-and-speaker-identification-in-multi-speaker-dialogue-scenario-2101.02550</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-multi-task-learning-for-speech-enhancement-and-speaker-identification-in-multi-speaker-dialogue-scenario-2101.02550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-multi-task-learning-for-speech-enhancement-and-speaker-identification-in-multi-speaker-dialogue-scenario-2101.02550"/></url>
<url><loc>https://scifaro.com/en/abs/coupling-a-generative-model-with-a-discriminative-learning-framework-for-speaker-verification-2101.03329</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coupling-a-generative-model-with-a-discriminative-learning-framework-for-speaker-verification-2101.03329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coupling-a-generative-model-with-a-discriminative-learning-framework-for-speaker-verification-2101.03329"/></url>
<url><loc>https://scifaro.com/en/abs/smartajweed-automatic-recognition-of-arabic-quranic-recitation-rules-2101.04200</loc><lastmod>2021-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smartajweed-automatic-recognition-of-arabic-quranic-recitation-rules-2101.04200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smartajweed-automatic-recognition-of-arabic-quranic-recitation-rules-2101.04200"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimator-2101.04315</loc><lastmod>2021-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimator-2101.04315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimator-2101.04315"/></url>
<url><loc>https://scifaro.com/en/abs/learning-efficient-representations-for-keyword-spotting-with-triplet-loss-2101.04792</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-efficient-representations-for-keyword-spotting-with-triplet-loss-2101.04792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-efficient-representations-for-keyword-spotting-with-triplet-loss-2101.04792"/></url>
<url><loc>https://scifaro.com/en/abs/effective-low-cost-time-domain-audio-separation-using-globally-attentive-locally-recurrent-networks-2101.05014</loc><lastmod>2021-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-low-cost-time-domain-audio-separation-using-globally-attentive-locally-recurrent-networks-2101.05014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-low-cost-time-domain-audio-separation-using-globally-attentive-locally-recurrent-networks-2101.05014"/></url>
<url><loc>https://scifaro.com/en/abs/whispered-and-lombard-neural-speech-synthesis-2101.05313</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whispered-and-lombard-neural-speech-synthesis-2101.05313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whispered-and-lombard-neural-speech-synthesis-2101.05313"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-activity-driven-neural-speech-extraction-2101.05516</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-activity-driven-neural-speech-extraction-2101.05516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-activity-driven-neural-speech-extraction-2101.05516"/></url>
<url><loc>https://scifaro.com/en/abs/an-evaluation-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2101.05525</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-evaluation-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2101.05525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-evaluation-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2101.05525"/></url>
<url><loc>https://scifaro.com/en/abs/fast-offline-transformer-based-end-to-end-automatic-speech-recognition-for-real-world-applications-2101.05600</loc><lastmod>2021-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-offline-transformer-based-end-to-end-automatic-speech-recognition-for-real-world-applications-2101.05600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-offline-transformer-based-end-to-end-automatic-speech-recognition-for-real-world-applications-2101.05600"/></url>
<url><loc>https://scifaro.com/en/abs/emocat-language-agnostic-emotional-voice-conversion-2101.05695</loc><lastmod>2021-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emocat-language-agnostic-emotional-voice-conversion-2101.05695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emocat-language-agnostic-emotional-voice-conversion-2101.05695"/></url>
<url><loc>https://scifaro.com/en/abs/multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.05975</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.05975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.05975"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-the-frequency-of-occurrence-of-italian-phonemes-in-text-2101.06147</loc><lastmod>2021-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-the-frequency-of-occurrence-of-italian-phonemes-in-text-2101.06147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-the-frequency-of-occurrence-of-italian-phonemes-in-text-2101.06147"/></url>
<url><loc>https://scifaro.com/en/abs/amffcn-attentional-multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.06268</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amffcn-attentional-multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.06268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amffcn-attentional-multi-layer-feature-fusion-convolution-network-for-audio-visual-speech-enhancement-2101.06268"/></url>
<url><loc>https://scifaro.com/en/abs/mispronunciation-detection-in-non-native-l2-english-with-uncertainty-modeling-2101.06396</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mispronunciation-detection-in-non-native-l2-english-with-uncertainty-modeling-2101.06396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mispronunciation-detection-in-non-native-l2-english-with-uncertainty-modeling-2101.06396"/></url>
<url><loc>https://scifaro.com/en/abs/an-embedded-multichannel-sound-acquisition-system-for-drone-audition-2101.06795</loc><lastmod>2021-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-embedded-multichannel-sound-acquisition-system-for-drone-audition-2101.06795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-embedded-multichannel-sound-acquisition-system-for-drone-audition-2101.06795"/></url>
<url><loc>https://scifaro.com/en/abs/tiny-transducer-a-highly-efficient-speech-recognition-model-on-edge-devices-2101.06856</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tiny-transducer-a-highly-efficient-speech-recognition-model-on-edge-devices-2101.06856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tiny-transducer-a-highly-efficient-speech-recognition-model-on-edge-devices-2101.06856"/></url>
<url><loc>https://scifaro.com/en/abs/improved-parallel-wavegan-vocoder-with-perceptually-weighted-spectrogram-loss-2101.07412</loc><lastmod>2021-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-parallel-wavegan-vocoder-with-perceptually-weighted-spectrogram-loss-2101.07412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-parallel-wavegan-vocoder-with-perceptually-weighted-spectrogram-loss-2101.07412"/></url>
<url><loc>https://scifaro.com/en/abs/arabic-speech-recognition-by-end-to-end-modular-systems-and-human-2101.08454</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arabic-speech-recognition-by-end-to-end-modular-systems-and-human-2101.08454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arabic-speech-recognition-by-end-to-end-modular-systems-and-human-2101.08454"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-f0-modification-for-x-vector-based-speech-pseudonymization-across-gender-2101.08478</loc><lastmod>2021-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-f0-modification-for-x-vector-based-speech-pseudonymization-across-gender-2101.08478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-f0-modification-for-x-vector-based-speech-pseudonymization-across-gender-2101.08478"/></url>
<url><loc>https://scifaro.com/en/abs/noisy-target-training-a-training-strategy-for-dnn-based-speech-enhancement-without-clean-speech-2101.08625</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisy-target-training-a-training-strategy-for-dnn-based-speech-enhancement-without-clean-speech-2101.08625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisy-target-training-a-training-strategy-for-dnn-based-speech-enhancement-without-clean-speech-2101.08625"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-pre-processing-defenses-against-adversarial-attacks-on-state-of-the-art-speaker-recognition-systems-2101.08909</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-pre-processing-defenses-against-adversarial-attacks-on-state-of-the-art-speaker-recognition-systems-2101.08909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-pre-processing-defenses-against-adversarial-attacks-on-state-of-the-art-speaker-recognition-systems-2101.08909"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-the-tradeoffs-in-client-side-privacy-for-downstream-speech-tasks-2101.08919</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-the-tradeoffs-in-client-side-privacy-for-downstream-speech-tasks-2101.08919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-the-tradeoffs-in-client-side-privacy-for-downstream-speech-tasks-2101.08919"/></url>
<url><loc>https://scifaro.com/en/abs/towards-efficient-models-for-real-time-deep-noise-suppression-2101.09249</loc><lastmod>2021-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-efficient-models-for-real-time-deep-noise-suppression-2101.09249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-efficient-models-for-real-time-deep-noise-suppression-2101.09249"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-speaker-diarization-recent-advances-with-deep-learning-2101.09624</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-speaker-diarization-recent-advances-with-deep-learning-2101.09624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-speaker-diarization-recent-advances-with-deep-learning-2101.09624"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-and-unsupervised-approaches-for-controlling-narrow-lexical-focus-in-sequence-to-sequence-speech-synthesis-2101.09940</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-and-unsupervised-approaches-for-controlling-narrow-lexical-focus-in-sequence-to-sequence-speech-synthesis-2101.09940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-and-unsupervised-approaches-for-controlling-narrow-lexical-focus-in-sequence-to-sequence-speech-synthesis-2101.09940"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-end-to-end-asr-for-endangered-language-documentation-an-empirical-study-on-yolox-ochitl-mixtec-2101.10877</loc><lastmod>2021-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-end-to-end-asr-for-endangered-language-documentation-an-empirical-study-on-yolox-ochitl-mixtec-2101.10877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-end-to-end-asr-for-endangered-language-documentation-an-empirical-study-on-yolox-ochitl-mixtec-2101.10877"/></url>
<url><loc>https://scifaro.com/en/abs/low-power-audio-keyword-spotting-using-tsetlin-machines-2101.11336</loc><lastmod>2021-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-power-audio-keyword-spotting-using-tsetlin-machines-2101.11336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-power-audio-keyword-spotting-using-tsetlin-machines-2101.11336"/></url>
<url><loc>https://scifaro.com/en/abs/vote400-voide-of-the-elderly-400-hours-a-speech-dataset-to-study-voice-interface-for-elderly-care-2101.11469</loc><lastmod>2021-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vote400-voide-of-the-elderly-400-hours-a-speech-dataset-to-study-voice-interface-for-elderly-care-2101.11469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vote400-voide-of-the-elderly-400-hours-a-speech-dataset-to-study-voice-interface-for-elderly-care-2101.11469"/></url>
<url><loc>https://scifaro.com/en/abs/bcn2brno-asr-system-fusion-for-albayzin-2020-speech-to-text-challenge-2101.12729</loc><lastmod>2021-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bcn2brno-asr-system-fusion-for-albayzin-2020-speech-to-text-challenge-2101.12729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bcn2brno-asr-system-fusion-for-albayzin-2020-speech-to-text-challenge-2101.12729"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-for-wake-up-word-detection-in-voice-assistants-2101.12732</loc><lastmod>2021-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-for-wake-up-word-detection-in-voice-assistants-2101.12732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-for-wake-up-word-detection-in-voice-assistants-2101.12732"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-sound-event-detection-using-random-augmentation-and-consistency-regularization-2102.00154</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-sound-event-detection-using-random-augmentation-and-consistency-regularization-2102.00154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-sound-event-detection-using-random-augmentation-and-consistency-regularization-2102.00154"/></url>
<url><loc>https://scifaro.com/en/abs/adversarially-learning-disentangled-speech-representations-for-robust-multi-factor-voice-conversion-2102.00184</loc><lastmod>2021-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarially-learning-disentangled-speech-representations-for-robust-multi-factor-voice-conversion-2102.00184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarially-learning-disentangled-speech-representations-for-robust-multi-factor-voice-conversion-2102.00184"/></url>
<url><loc>https://scifaro.com/en/abs/directional-sparse-filtering-using-weighted-lehmer-mean-for-blind-separation-of-unbalanced-speech-mixtures-2102.00196</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-sparse-filtering-using-weighted-lehmer-mean-for-blind-separation-of-unbalanced-speech-mixtures-2102.00196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-sparse-filtering-using-weighted-lehmer-mean-for-blind-separation-of-unbalanced-speech-mixtures-2102.00196"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-the-intelligibility-of-cleft-lip-and-palate-speech-using-cycle-consistent-adversarial-networks-2102.00270</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-the-intelligibility-of-cleft-lip-and-palate-speech-using-cycle-consistent-adversarial-networks-2102.00270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-the-intelligibility-of-cleft-lip-and-palate-speech-using-cycle-consistent-adversarial-networks-2102.00270"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-language-identification-using-multi-head-self-attention-and-1d-convolutional-neural-networks-2102.00306</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-language-identification-using-multi-head-self-attention-and-1d-convolutional-neural-networks-2102.00306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-language-identification-using-multi-head-self-attention-and-1d-convolutional-neural-networks-2102.00306"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-bert-joint-language-modelling-of-phoneme-sequence-and-asr-transcript-2102.00804</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-bert-joint-language-modelling-of-phoneme-sequence-and-asr-transcript-2102.00804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-bert-joint-language-modelling-of-phoneme-sequence-and-asr-transcript-2102.00804"/></url>
<url><loc>https://scifaro.com/en/abs/on-scaling-contrastive-representations-for-low-resource-speech-recognition-2102.00850</loc><lastmod>2021-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-scaling-contrastive-representations-for-low-resource-speech-recognition-2102.00850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-scaling-contrastive-representations-for-low-resource-speech-recognition-2102.00850"/></url>
<url><loc>https://scifaro.com/en/abs/universal-neural-vocoding-with-parallel-wavenet-2102.01106</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-neural-vocoding-with-parallel-wavenet-2102.01106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-neural-vocoding-with-parallel-wavenet-2102.01106"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-attention-fusion-for-target-speaker-extraction-2102.01326</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-attention-fusion-for-target-speaker-extraction-2102.01326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-attention-fusion-for-target-speaker-extraction-2102.01326"/></url>
<url><loc>https://scifaro.com/en/abs/the-hitachi-jhu-dihard-iii-system-competitive-end-to-end-neural-diarization-and-x-vector-clustering-systems-combined-by-dover-lap-2102.01363</loc><lastmod>2021-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-hitachi-jhu-dihard-iii-system-competitive-end-to-end-neural-diarization-and-x-vector-clustering-systems-combined-by-dover-lap-2102.01363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-hitachi-jhu-dihard-iii-system-competitive-end-to-end-neural-diarization-and-x-vector-clustering-systems-combined-by-dover-lap-2102.01363"/></url>
<url><loc>https://scifaro.com/en/abs/internal-language-model-training-for-domain-adaptive-end-to-end-speech-recognition-2102.01380</loc><lastmod>2021-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/internal-language-model-training-for-domain-adaptive-end-to-end-speech-recognition-2102.01380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/internal-language-model-training-for-domain-adaptive-end-to-end-speech-recognition-2102.01380"/></url>
<url><loc>https://scifaro.com/en/abs/inference-of-the-selective-auditory-attention-using-sequential-lmmse-estimation-2102.01746</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inference-of-the-selective-auditory-attention-using-sequential-lmmse-estimation-2102.01746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inference-of-the-selective-auditory-attention-using-sequential-lmmse-estimation-2102.01746"/></url>
<url><loc>https://scifaro.com/en/abs/a-global-local-attention-framework-for-weakly-labelled-audio-tagging-2102.01931</loc><lastmod>2021-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-global-local-attention-framework-for-weakly-labelled-audio-tagging-2102.01931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-global-local-attention-framework-for-weakly-labelled-audio-tagging-2102.01931"/></url>
<url><loc>https://scifaro.com/en/abs/vsegan-visual-speech-enhancement-generative-adversarial-network-2102.02599</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vsegan-visual-speech-enhancement-generative-adversarial-network-2102.02599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vsegan-visual-speech-enhancement-generative-adversarial-network-2102.02599"/></url>
<url><loc>https://scifaro.com/en/abs/infant-cry-classification-with-graph-convolutional-networks-2102.02909</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infant-cry-classification-with-graph-convolutional-networks-2102.02909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infant-cry-classification-with-graph-convolutional-networks-2102.02909"/></url>
<url><loc>https://scifaro.com/en/abs/beam-guided-tasnet-an-iterative-speech-separation-framework-with-multi-channel-output-2102.02998</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beam-guided-tasnet-an-iterative-speech-separation-framework-with-multi-channel-output-2102.02998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beam-guided-tasnet-an-iterative-speech-separation-framework-with-multi-channel-output-2102.02998"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-microphone-clusters-in-acoustic-sensor-networks-using-unsupervised-federated-learning-2102.03109</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-microphone-clusters-in-acoustic-sensor-networks-using-unsupervised-federated-learning-2102.03109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-microphone-clusters-in-acoustic-sensor-networks-using-unsupervised-federated-learning-2102.03109"/></url>
<url><loc>https://scifaro.com/en/abs/lexical-and-syntactic-gemination-in-italian-consonants-does-a-geminate-italian-consonant-consist-of-a-repeated-or-a-strengthened-consonant-2102.03166</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lexical-and-syntactic-gemination-in-italian-consonants-does-a-geminate-italian-consonant-consist-of-a-repeated-or-a-strengthened-consonant-2102.03166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lexical-and-syntactic-gemination-in-italian-consonants-does-a-geminate-italian-consonant-consist-of-a-repeated-or-a-strengthened-consonant-2102.03166"/></url>
<url><loc>https://scifaro.com/en/abs/intermediate-loss-regularization-for-ctc-based-speech-recognition-2102.03216</loc><lastmod>2021-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intermediate-loss-regularization-for-ctc-based-speech-recognition-2102.03216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intermediate-loss-regularization-for-ctc-based-speech-recognition-2102.03216"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-in-urban-audio-with-single-and-multi-rate-pcen-2102.03468</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-in-urban-audio-with-single-and-multi-rate-pcen-2102.03468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-in-urban-audio-with-single-and-multi-rate-pcen-2102.03468"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-attribution-with-voice-profiles-by-graph-based-semi-supervised-learning-2102.03634</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-attribution-with-voice-profiles-by-graph-based-semi-supervised-learning-2102.03634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-attribution-with-voice-profiles-by-graph-based-semi-supervised-learning-2102.03634"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-duke-lenovo-system-description-for-the-third-dihard-speech-diarization-challenge-2102.03649</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-duke-lenovo-system-description-for-the-third-dihard-speech-diarization-challenge-2102.03649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-duke-lenovo-system-description-for-the-third-dihard-speech-diarization-challenge-2102.03649"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speech-extraction-with-spatial-information-and-multi-speaker-conditioning-mechanism-2102.03762</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speech-extraction-with-spatial-information-and-multi-speaker-conditioning-mechanism-2102.03762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speech-extraction-with-spatial-information-and-multi-speaker-conditioning-mechanism-2102.03762"/></url>
<url><loc>https://scifaro.com/en/abs/ema2s-an-end-to-end-multimodal-articulatory-to-speech-system-2102.03786</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ema2s-an-end-to-end-multimodal-articulatory-to-speech-system-2102.03786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ema2s-an-end-to-end-multimodal-articulatory-to-speech-system-2102.03786"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-channel-transformer-for-speech-recognition-2102.03951</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-channel-transformer-for-speech-recognition-2102.03951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-channel-transformer-for-speech-recognition-2102.03951"/></url>
<url><loc>https://scifaro.com/en/abs/non-linear-frequency-warping-using-constant-q-transformation-for-speech-emotion-recognition-2102.04029</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-linear-frequency-warping-using-constant-q-transformation-for-speech-emotion-recognition-2102.04029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-linear-frequency-warping-using-constant-q-transformation-for-speech-emotion-recognition-2102.04029"/></url>
<url><loc>https://scifaro.com/en/abs/switching-variational-auto-encoders-for-noise-agnostic-audio-visual-speech-enhancement-2102.04144</loc><lastmod>2021-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/switching-variational-auto-encoders-for-noise-agnostic-audio-visual-speech-enhancement-2102.04144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/switching-variational-auto-encoders-for-noise-agnostic-audio-visual-speech-enhancement-2102.04144"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-monaural-speech-enhancement-with-short-time-discrete-cosine-transform-2102.04629</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-monaural-speech-enhancement-with-short-time-discrete-cosine-transform-2102.04629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-monaural-speech-enhancement-with-short-time-discrete-cosine-transform-2102.04629"/></url>
<url><loc>https://scifaro.com/en/abs/independent-vector-extraction-for-fast-joint-blind-source-separation-and-dereverberation-2102.04696</loc><lastmod>2021-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-vector-extraction-for-fast-joint-blind-source-separation-and-dereverberation-2102.04696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-vector-extraction-for-fast-joint-blind-source-separation-and-dereverberation-2102.04696"/></url>
<url><loc>https://scifaro.com/en/abs/train-your-classifier-first-cascade-neural-networks-training-from-upper-layers-to-lower-layers-2102.04697</loc><lastmod>2021-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/train-your-classifier-first-cascade-neural-networks-training-from-upper-layers-to-lower-layers-2102.04697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/train-your-classifier-first-cascade-neural-networks-training-from-upper-layers-to-lower-layers-2102.04697"/></url>
<url><loc>https://scifaro.com/en/abs/cdpam-contrastive-learning-for-perceptual-audio-similarity-2102.05109</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cdpam-contrastive-learning-for-perceptual-audio-similarity-2102.05109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cdpam-contrastive-learning-for-perceptual-audio-similarity-2102.05109"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-real-time-joint-neural-echo-control-and-speech-enhancement-based-on-percepnet-2102.05245</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-real-time-joint-neural-echo-control-and-speech-enhancement-based-on-percepnet-2102.05245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-real-time-joint-neural-echo-control-and-speech-enhancement-based-on-percepnet-2102.05245"/></url>
<url><loc>https://scifaro.com/en/abs/vace-wpe-virtual-acoustic-channel-expansion-based-on-neural-networks-for-weighted-prediction-error-based-speech-dereverberation-2102.05259</loc><lastmod>2021-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vace-wpe-virtual-acoustic-channel-expansion-based-on-neural-networks-for-weighted-prediction-error-based-speech-dereverberation-2102.05259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vace-wpe-virtual-acoustic-channel-expansion-based-on-neural-networks-for-weighted-prediction-error-based-speech-dereverberation-2102.05259"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2019-spoofing-countermeasures-for-the-detection-of-synthesized-converted-and-replayed-speech-2102.05889</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2019-spoofing-countermeasures-for-the-detection-of-synthesized-converted-and-replayed-speech-2102.05889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2019-spoofing-countermeasures-for-the-detection-of-synthesized-converted-and-replayed-speech-2102.05889"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-neural-networks-for-real-time-modeling-of-analog-dynamic-range-compression-2102.06200</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-neural-networks-for-real-time-modeling-of-analog-dynamic-range-compression-2102.06200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-neural-networks-for-real-time-modeling-of-analog-dynamic-range-compression-2102.06200"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-end-to-end-models-for-robust-speech-recognition-2102.06237</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-end-to-end-models-for-robust-speech-recognition-2102.06237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-end-to-end-models-for-robust-speech-recognition-2102.06237"/></url>
<url><loc>https://scifaro.com/en/abs/deepf0-end-to-end-fundamental-frequency-estimation-for-music-and-speech-signals-2102.06306</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepf0-end-to-end-fundamental-frequency-estimation-for-music-and-speech-signals-2102.06306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepf0-end-to-end-fundamental-frequency-estimation-for-music-and-speech-signals-2102.06306"/></url>
<url><loc>https://scifaro.com/en/abs/joint-dereverberation-and-separation-with-iterative-source-steering-2102.06322</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-dereverberation-and-separation-with-iterative-source-steering-2102.06322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-dereverberation-and-separation-with-iterative-source-steering-2102.06322"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-with-signal-companding-for-detection-of-logical-access-attacks-2102.06332</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-with-signal-companding-for-detection-of-logical-access-attacks-2102.06332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-with-signal-companding-for-detection-of-logical-access-attacks-2102.06332"/></url>
<url><loc>https://scifaro.com/en/abs/guided-variational-autoencoder-for-speech-enhancement-with-a-supervised-classifier-2102.06454</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-variational-autoencoder-for-speech-enhancement-with-a-supervised-classifier-2102.06454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-variational-autoencoder-for-speech-enhancement-with-a-supervised-classifier-2102.06454"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-into-the-codec-noise-robust-speech-coding-with-vector-quantized-autoencoders-2102.06610</loc><lastmod>2021-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-into-the-codec-noise-robust-speech-coding-with-vector-quantized-autoencoders-2102.06610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-into-the-codec-noise-robust-speech-coding-with-vector-quantized-autoencoders-2102.06610"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-phonetic-neural-model-for-correction-in-speech-recognition-systems-2102.06744</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-phonetic-neural-model-for-correction-in-speech-recognition-systems-2102.06744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-phonetic-neural-model-for-correction-in-speech-recognition-systems-2102.06744"/></url>
<url><loc>https://scifaro.com/en/abs/bi-apc-bidirectional-autoregressive-predictive-coding-for-unsupervised-pre-training-and-its-application-to-children-s-asr-2102.06816</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bi-apc-bidirectional-autoregressive-predictive-coding-for-unsupervised-pre-training-and-its-application-to-children-s-asr-2102.06816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bi-apc-bidirectional-autoregressive-predictive-coding-for-unsupervised-pre-training-and-its-application-to-children-s-asr-2102.06816"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-defense-for-automatic-speaker-verification-by-cascaded-self-supervised-learning-models-2102.07047</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-defense-for-automatic-speaker-verification-by-cascaded-self-supervised-learning-models-2102.07047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-defense-for-automatic-speaker-verification-by-cascaded-self-supervised-learning-models-2102.07047"/></url>
<url><loc>https://scifaro.com/en/abs/inverted-vocal-tract-variables-and-facial-action-units-to-quantify-neuromotor-coordination-in-schizophrenia-2102.07054</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inverted-vocal-tract-variables-and-facial-action-units-to-quantify-neuromotor-coordination-in-schizophrenia-2102.07054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inverted-vocal-tract-variables-and-facial-action-units-to-quantify-neuromotor-coordination-in-schizophrenia-2102.07054"/></url>
<url><loc>https://scifaro.com/en/abs/a-modulation-domain-loss-for-neural-network-based-real-time-speech-enhancement-2102.07330</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-modulation-domain-loss-for-neural-network-based-real-time-speech-enhancement-2102.07330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-modulation-domain-loss-for-neural-network-based-real-time-speech-enhancement-2102.07330"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-for-speech-recognition-using-feedback-based-relevance-weighting-2102.07390</loc><lastmod>2021-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-for-speech-recognition-using-feedback-based-relevance-weighting-2102.07390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-for-speech-recognition-using-feedback-based-relevance-weighting-2102.07390"/></url>
<url><loc>https://scifaro.com/en/abs/on-training-targets-for-noise-robust-voice-activity-detection-2102.07445</loc><lastmod>2021-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-training-targets-for-noise-robust-voice-activity-detection-2102.07445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-training-targets-for-noise-robust-voice-activity-detection-2102.07445"/></url>
<url><loc>https://scifaro.com/en/abs/periodnet-a-non-autoregressive-waveform-generation-model-with-a-structure-separating-periodic-and-aperiodic-components-2102.07786</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/periodnet-a-non-autoregressive-waveform-generation-model-with-a-structure-separating-periodic-and-aperiodic-components-2102.07786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/periodnet-a-non-autoregressive-waveform-generation-model-with-a-structure-separating-periodic-and-aperiodic-components-2102.07786"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-multi-source-localization-with-source-splitting-and-its-effectiveness-in-multi-talker-speech-recognition-2102.07955</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-multi-source-localization-with-source-splitting-and-its-effectiveness-in-multi-talker-speech-recognition-2102.07955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-multi-source-localization-with-source-splitting-and-its-effectiveness-in-multi-talker-speech-recognition-2102.07955"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-singing-voice-separation-with-noisy-self-training-2102.07961</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-singing-voice-separation-with-noisy-self-training-2102.07961"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-singing-voice-separation-with-noisy-self-training-2102.07961"/></url>
<url><loc>https://scifaro.com/en/abs/axial-residual-networks-for-cyclegan-based-voice-conversion-2102.08075</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/axial-residual-networks-for-cyclegan-based-voice-conversion-2102.08075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/axial-residual-networks-for-cyclegan-based-voice-conversion-2102.08075"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-prosody-correction-for-text-based-speech-editing-2102.08328</loc><lastmod>2021-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-prosody-correction-for-text-based-speech-editing-2102.08328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-prosody-correction-for-text-based-speech-editing-2102.08328"/></url>
<url><loc>https://scifaro.com/en/abs/variational-autoencoder-for-speech-enhancement-with-a-noise-aware-encoder-2102.08706</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-autoencoder-for-speech-enhancement-with-a-noise-aware-encoder-2102.08706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-autoencoder-for-speech-enhancement-with-a-noise-aware-encoder-2102.08706"/></url>
<url><loc>https://scifaro.com/en/abs/fundamental-frequency-feature-normalization-and-data-augmentation-for-child-speech-recognition-2102.09106</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fundamental-frequency-feature-normalization-and-data-augmentation-for-child-speech-recognition-2102.09106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fundamental-frequency-feature-normalization-and-data-augmentation-for-child-speech-recognition-2102.09106"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-kernelized-self-attention-for-long-sequence-data-and-its-application-to-ctc-based-speech-recognition-2102.09168</loc><lastmod>2021-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-kernelized-self-attention-for-long-sequence-data-and-its-application-to-ctc-based-speech-recognition-2102.09168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-kernelized-self-attention-for-long-sequence-data-and-its-application-to-ctc-based-speech-recognition-2102.09168"/></url>
<url><loc>https://scifaro.com/en/abs/generative-speech-coding-with-predictive-variance-regularization-2102.09660</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-speech-coding-with-predictive-variance-regularization-2102.09660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-speech-coding-with-predictive-variance-regularization-2102.09660"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-curriculum-learning-via-data-parameters-for-noise-robust-keyword-spotting-2102.09666</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-curriculum-learning-via-data-parameters-for-noise-robust-keyword-spotting-2102.09666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-curriculum-learning-via-data-parameters-for-noise-robust-keyword-spotting-2102.09666"/></url>
<url><loc>https://scifaro.com/en/abs/a-robust-maximum-likelihood-distortionless-response-beamformer-based-on-a-complex-generalized-gaussian-distribution-2102.09838</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-robust-maximum-likelihood-distortionless-response-beamformer-based-on-a-complex-generalized-gaussian-distribution-2102.09838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-robust-maximum-likelihood-distortionless-response-beamformer-based-on-a-complex-generalized-gaussian-distribution-2102.09838"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-estimation-of-noisy-speech-using-convolutional-recurrent-neural-networks-with-higher-order-ambisonics-signals-2102.09853</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-noisy-speech-using-convolutional-recurrent-neural-networks-with-higher-order-ambisonics-signals-2102.09853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-noisy-speech-using-convolutional-recurrent-neural-networks-with-higher-order-ambisonics-signals-2102.09853"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-neural-systems-for-automatic-children-speech-recognition-an-empirical-study-2102.09918</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-neural-systems-for-automatic-children-speech-recognition-an-empirical-study-2102.09918"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-neural-systems-for-automatic-children-speech-recognition-an-empirical-study-2102.09918"/></url>
<url><loc>https://scifaro.com/en/abs/do-end-to-end-speech-recognition-models-care-about-context-2102.09928</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-end-to-end-speech-recognition-models-care-about-context-2102.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-end-to-end-speech-recognition-models-care-about-context-2102.09928"/></url>
<url><loc>https://scifaro.com/en/abs/absp-system-for-the-third-dihard-challenge-2102.09939</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/absp-system-for-the-third-dihard-challenge-2102.09939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/absp-system-for-the-third-dihard-challenge-2102.09939"/></url>
<url><loc>https://scifaro.com/en/abs/artificially-synthesising-data-for-audio-classification-and-segmentation-to-improve-speech-and-music-detection-in-radio-broadcast-2102.09959</loc><lastmod>2021-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificially-synthesising-data-for-audio-classification-and-segmentation-to-improve-speech-and-music-detection-in-radio-broadcast-2102.09959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificially-synthesising-data-for-audio-classification-and-segmentation-to-improve-speech-and-music-detection-in-radio-broadcast-2102.09959"/></url>
<url><loc>https://scifaro.com/en/abs/model-architectures-to-extrapolate-emotional-expressions-in-dnn-based-text-to-speech-2102.10345</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-architectures-to-extrapolate-emotional-expressions-in-dnn-based-text-to-speech-2102.10345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-architectures-to-extrapolate-emotional-expressions-in-dnn-based-text-to-speech-2102.10345"/></url>
<url><loc>https://scifaro.com/en/abs/the-use-of-voice-source-features-for-sung-speech-recognition-2102.10376</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-use-of-voice-source-features-for-sung-speech-recognition-2102.10376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-use-of-voice-source-features-for-sung-speech-recognition-2102.10376"/></url>
<url><loc>https://scifaro.com/en/abs/warp-q-quality-prediction-for-generative-neural-speech-codecs-2102.10449</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/warp-q-quality-prediction-for-generative-neural-speech-codecs-2102.10449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/warp-q-quality-prediction-for-generative-neural-speech-codecs-2102.10449"/></url>
<url><loc>https://scifaro.com/en/abs/lvcnet-efficient-condition-dependent-modeling-network-for-waveform-generation-2102.10815</loc><lastmod>2021-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lvcnet-efficient-condition-dependent-modeling-network-for-waveform-generation-2102.10815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lvcnet-efficient-condition-dependent-modeling-network-for-waveform-generation-2102.10815"/></url>
<url><loc>https://scifaro.com/en/abs/automated-evaluation-of-psychotherapy-skills-using-speech-and-language-technologies-2102.11265</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-evaluation-of-psychotherapy-skills-using-speech-and-language-technologies-2102.11265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-evaluation-of-psychotherapy-skills-using-speech-and-language-technologies-2102.11265"/></url>
<url><loc>https://scifaro.com/en/abs/evolutionary-optimization-of-contexts-for-phonetic-correction-in-speech-recognition-systems-2102.11480</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolutionary-optimization-of-contexts-for-phonetic-correction-in-speech-recognition-systems-2102.11480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolutionary-optimization-of-contexts-for-phonetic-correction-in-speech-recognition-systems-2102.11480"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-dereverberation-beamforming-and-speech-recognition-with-improved-numerical-stability-and-advanced-frontend-2102.11525</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-dereverberation-beamforming-and-speech-recognition-with-improved-numerical-stability-and-advanced-frontend-2102.11525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-dereverberation-beamforming-and-speech-recognition-with-improved-numerical-stability-and-advanced-frontend-2102.11525"/></url>
<url><loc>https://scifaro.com/en/abs/unidirectional-memory-self-attention-transducer-for-online-speech-recognition-2102.11594</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unidirectional-memory-self-attention-transducer-for-online-speech-recognition-2102.11594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unidirectional-memory-self-attention-transducer-for-online-speech-recognition-2102.11594"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-modeling-for-long-recording-speech-separation-in-meetings-2102.11634</loc><lastmod>2021-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-modeling-for-long-recording-speech-separation-in-meetings-2102.11634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-modeling-for-long-recording-speech-separation-in-meetings-2102.11634"/></url>
<url><loc>https://scifaro.com/en/abs/handling-background-noise-in-neural-speech-generation-2102.11906</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/handling-background-noise-in-neural-speech-generation-2102.11906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/handling-background-noise-in-neural-speech-generation-2102.11906"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-multi-stage-self-attentive-temporal-convolutional-networks-2102.12078</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-multi-stage-self-attentive-temporal-convolutional-networks-2102.12078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-multi-stage-self-attentive-temporal-convolutional-networks-2102.12078"/></url>
<url><loc>https://scifaro.com/en/abs/sep-28k-a-dataset-for-stuttering-event-detection-from-podcasts-with-people-who-stutter-2102.12394</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sep-28k-a-dataset-for-stuttering-event-detection-from-podcasts-with-people-who-stutter-2102.12394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sep-28k-a-dataset-for-stuttering-event-detection-from-podcasts-with-people-who-stutter-2102.12394"/></url>
<url><loc>https://scifaro.com/en/abs/thoughts-on-the-potential-to-compensate-a-hearing-loss-in-noise-2102.12397</loc><lastmod>2021-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thoughts-on-the-potential-to-compensate-a-hearing-loss-in-noise-2102.12397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thoughts-on-the-potential-to-compensate-a-hearing-loss-in-noise-2102.12397"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-for-improving-rare-word-recognition-in-end-to-end-asr-2102.12624</loc><lastmod>2021-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-for-improving-rare-word-recognition-in-end-to-end-asr-2102.12624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-for-improving-rare-word-recognition-in-end-to-end-asr-2102.12624"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-classification-of-osa-related-snoring-signals-from-nocturnal-audio-recordings-2102.12829</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-classification-of-osa-related-snoring-signals-from-nocturnal-audio-recordings-2102.12829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-classification-of-osa-related-snoring-signals-from-nocturnal-audio-recordings-2102.12829"/></url>
<url><loc>https://scifaro.com/en/abs/integration-of-deep-learning-with-expectation-maximization-for-spatial-cue-based-speech-separation-in-reverberant-conditions-2102.13334</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integration-of-deep-learning-with-expectation-maximization-for-spatial-cue-based-speech-separation-in-reverberant-conditions-2102.13334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integration-of-deep-learning-with-expectation-maximization-for-spatial-cue-based-speech-separation-in-reverberant-conditions-2102.13334"/></url>
<url><loc>https://scifaro.com/en/abs/underwater-acoustic-communication-receiver-using-deep-belief-network-2102.13397</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/underwater-acoustic-communication-receiver-using-deep-belief-network-2102.13397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/underwater-acoustic-communication-receiver-using-deep-belief-network-2102.13397"/></url>
<url><loc>https://scifaro.com/en/abs/the-interspeech-2021-computational-paralinguistics-challenge-covid-19-cough-covid-19-speech-escalation-primates-2102.13468</loc><lastmod>2021-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-interspeech-2021-computational-paralinguistics-challenge-covid-19-cough-covid-19-speech-escalation-primates-2102.13468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-interspeech-2021-computational-paralinguistics-challenge-covid-19-cough-covid-19-speech-escalation-primates-2102.13468"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-ultrasound-tongue-imaging-for-the-automatic-detection-of-speech-articulation-errors-2103.00324</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-ultrasound-tongue-imaging-for-the-automatic-detection-of-speech-articulation-errors-2103.00324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-ultrasound-tongue-imaging-for-the-automatic-detection-of-speech-articulation-errors-2103.00324"/></url>
<url><loc>https://scifaro.com/en/abs/silent-versus-modal-multi-speaker-speech-recognition-from-ultrasound-and-video-2103.00333</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/silent-versus-modal-multi-speaker-speech-recognition-from-ultrasound-and-video-2103.00333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/silent-versus-modal-multi-speaker-speech-recognition-from-ultrasound-and-video-2103.00333"/></url>
<url><loc>https://scifaro.com/en/abs/alignment-knowledge-distillation-for-online-streaming-attention-based-speech-recognition-2103.00422</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignment-knowledge-distillation-for-online-streaming-attention-based-speech-recognition-2103.00422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignment-knowledge-distillation-for-online-streaming-attention-based-speech-recognition-2103.00422"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-separative-coding-for-self-supervised-representation-learning-2103.00816</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-separative-coding-for-self-supervised-representation-learning-2103.00816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-separative-coding-for-self-supervised-representation-learning-2103.00816"/></url>
<url><loc>https://scifaro.com/en/abs/sandglasset-a-light-multi-granularity-self-attentive-network-for-time-domain-speech-separation-2103.00819</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sandglasset-a-light-multi-granularity-self-attentive-network-for-time-domain-speech-separation-2103.00819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sandglasset-a-light-multi-granularity-self-attentive-network-for-time-domain-speech-separation-2103.00819"/></url>
<url><loc>https://scifaro.com/en/abs/adaspeech-adaptive-text-to-speech-for-custom-voice-2103.00993</loc><lastmod>2021-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaspeech-adaptive-text-to-speech-for-custom-voice-2103.00993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaspeech-adaptive-text-to-speech-for-custom-voice-2103.00993"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-acoustic-analyses-of-speech-data-collected-remotely-2103.01059</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-acoustic-analyses-of-speech-data-collected-remotely-2103.01059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-acoustic-analyses-of-speech-data-collected-remotely-2103.01059"/></url>
<url><loc>https://scifaro.com/en/abs/tune-in-training-under-negative-environments-with-interference-for-attention-networks-simulating-cocktail-party-effect-2103.01461</loc><lastmod>2021-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tune-in-training-under-negative-environments-with-interference-for-attention-networks-simulating-cocktail-party-effect-2103.01461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tune-in-training-under-negative-environments-with-interference-for-attention-networks-simulating-cocktail-party-effect-2103.01461"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-vad-into-asr-system-by-multi-task-learning-2103.01661</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-vad-into-asr-system-by-multi-task-learning-2103.01661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-vad-into-asr-system-by-multi-task-learning-2103.01661"/></url>
<url><loc>https://scifaro.com/en/abs/reverb-conversion-of-mixed-vocal-tracks-using-an-end-to-end-convolutional-deep-neural-network-2103.02147</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverb-conversion-of-mixed-vocal-tracks-using-an-end-to-end-convolutional-deep-neural-network-2103.02147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverb-conversion-of-mixed-vocal-tracks-using-an-end-to-end-convolutional-deep-neural-network-2103.02147"/></url>
<url><loc>https://scifaro.com/en/abs/open-community-platform-for-hearing-aid-algorithm-research-open-master-hearing-aid-openmha-2103.02313</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-community-platform-for-hearing-aid-algorithm-research-open-master-hearing-aid-openmha-2103.02313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-community-platform-for-hearing-aid-algorithm-research-open-master-hearing-aid-openmha-2103.02313"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-speech-and-noise-levels-on-the-quality-perceived-by-cochlear-implant-and-normal-hearing-listeners-2103.02421</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-speech-and-noise-levels-on-the-quality-perceived-by-cochlear-implant-and-normal-hearing-listeners-2103.02421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-speech-and-noise-levels-on-the-quality-perceived-by-cochlear-implant-and-normal-hearing-listeners-2103.02421"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-and-multi-microphone-acoustic-echo-cancellation-using-a-deep-learning-based-approach-2103.02552</loc><lastmod>2021-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-and-multi-microphone-acoustic-echo-cancellation-using-a-deep-learning-based-approach-2103.02552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-and-multi-microphone-acoustic-echo-cancellation-using-a-deep-learning-based-approach-2103.02552"/></url>
<url><loc>https://scifaro.com/en/abs/the-spatial-selective-auditory-attention-of-cochlear-implant-users-in-different-conversational-sound-levels-2103.02703</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-spatial-selective-auditory-attention-of-cochlear-implant-users-in-different-conversational-sound-levels-2103.02703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-spatial-selective-auditory-attention-of-cochlear-implant-users-in-different-conversational-sound-levels-2103.02703"/></url>
<url><loc>https://scifaro.com/en/abs/crank-an-open-source-software-for-nonparallel-voice-conversion-based-on-vector-quantized-variational-autoencoder-2103.02858</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crank-an-open-source-software-for-nonparallel-voice-conversion-based-on-vector-quantized-variational-autoencoder-2103.02858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crank-an-open-source-software-for-nonparallel-voice-conversion-based-on-vector-quantized-variational-autoencoder-2103.02858"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-acoustic-modelling-for-phone-recognition-of-young-readers-2103.02899</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-acoustic-modelling-for-phone-recognition-of-young-readers-2103.02899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-acoustic-modelling-for-phone-recognition-of-young-readers-2103.02899"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-mispronunciation-detection-and-diagnosis-from-raw-waveforms-2103.03023</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-mispronunciation-detection-and-diagnosis-from-raw-waveforms-2103.03023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-mispronunciation-detection-and-diagnosis-from-raw-waveforms-2103.03023"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-text-to-speech-model-utilizing-broadcast-data-mixed-with-background-music-2103.03049</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-text-to-speech-model-utilizing-broadcast-data-mixed-with-background-music-2103.03049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-text-to-speech-model-utilizing-broadcast-data-mixed-with-background-music-2103.03049"/></url>
<url><loc>https://scifaro.com/en/abs/front-end-diarization-for-percussion-separation-in-taniavartanam-of-carnatic-music-concerts-2103.03215</loc><lastmod>2021-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/front-end-diarization-for-percussion-separation-in-taniavartanam-of-carnatic-music-concerts-2103.03215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/front-end-diarization-for-percussion-separation-in-taniavartanam-of-carnatic-music-concerts-2103.03215"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-cnn-bilstm-voice-activity-detector-2103.03529</loc><lastmod>2021-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-cnn-bilstm-voice-activity-detector-2103.03529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-cnn-bilstm-voice-activity-detector-2103.03529"/></url>
<url><loc>https://scifaro.com/en/abs/odas-open-embedded-audition-system-2103.03954</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/odas-open-embedded-audition-system-2103.03954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/odas-open-embedded-audition-system-2103.03954"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-wireless-communication-parameters-into-the-e-model-algorithm-2103.03970</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-wireless-communication-parameters-into-the-e-model-algorithm-2103.03970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-wireless-communication-parameters-into-the-e-model-algorithm-2103.03970"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-on-incorporating-pretrained-and-learnable-speaker-representations-for-multi-speaker-multi-style-text-to-speech-2103.04088</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-on-incorporating-pretrained-and-learnable-speaker-representations-for-multi-speaker-multi-style-text-to-speech-2103.04088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-on-incorporating-pretrained-and-learnable-speaker-representations-for-multi-speaker-multi-style-text-to-speech-2103.04088"/></url>
<url><loc>https://scifaro.com/en/abs/htmd-net-a-hybrid-masking-denoising-approach-to-time-domain-monaural-singing-voice-separation-2103.04336</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/htmd-net-a-hybrid-masking-denoising-approach-to-time-domain-monaural-singing-voice-separation-2103.04336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/htmd-net-a-hybrid-masking-denoising-approach-to-time-domain-monaural-singing-voice-separation-2103.04336"/></url>
<url><loc>https://scifaro.com/en/abs/an-optimized-signal-processing-pipeline-for-syllable-detection-and-speech-rate-estimation-2103.04346</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-optimized-signal-processing-pipeline-for-syllable-detection-and-speech-rate-estimation-2103.04346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-optimized-signal-processing-pipeline-for-syllable-detection-and-speech-rate-estimation-2103.04346"/></url>
<url><loc>https://scifaro.com/en/abs/cuhk-ee-voice-cloning-system-for-icassp-2021-m2voc-challenge-2103.04699</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuhk-ee-voice-cloning-system-for-icassp-2021-m2voc-challenge-2103.04699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuhk-ee-voice-cloning-system-for-icassp-2021-m2voc-challenge-2103.04699"/></url>
<url><loc>https://scifaro.com/en/abs/an-ultra-low-power-rnn-classifier-for-always-on-voice-wake-up-detection-robust-to-real-world-scenarios-2103.04792</loc><lastmod>2021-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ultra-low-power-rnn-classifier-for-always-on-voice-wake-up-detection-robust-to-real-world-scenarios-2103.04792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ultra-low-power-rnn-classifier-for-always-on-voice-wake-up-detection-robust-to-real-world-scenarios-2103.04792"/></url>
<url><loc>https://scifaro.com/en/abs/a-parallelizable-lattice-rescoring-strategy-with-neural-language-models-2103.05081</loc><lastmod>2021-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-parallelizable-lattice-rescoring-strategy-with-neural-language-models-2103.05081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-parallelizable-lattice-rescoring-strategy-with-neural-language-models-2103.05081"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-based-spoken-term-detection-and-localization-without-dynamic-programming-2103.05468</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-based-spoken-term-detection-and-localization-without-dynamic-programming-2103.05468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-based-spoken-term-detection-and-localization-without-dynamic-programming-2103.05468"/></url>
<url><loc>https://scifaro.com/en/abs/best-of-both-worlds-robust-accented-speech-recognition-with-adversarial-transfer-learning-2103.05834</loc><lastmod>2021-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/best-of-both-worlds-robust-accented-speech-recognition-with-adversarial-transfer-learning-2103.05834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/best-of-both-worlds-robust-accented-speech-recognition-with-adversarial-transfer-learning-2103.05834"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multiway-canonical-correlation-analysis-for-multi-subject-eeg-normalization-2103.06478</loc><lastmod>2021-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multiway-canonical-correlation-analysis-for-multi-subject-eeg-normalization-2103.06478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multiway-canonical-correlation-analysis-for-multi-subject-eeg-normalization-2103.06478"/></url>
<url><loc>https://scifaro.com/en/abs/forward-backward-convolutional-recurrent-neural-networks-and-tag-conditioned-convolutional-neural-networks-for-weakly-labeled-semi-supervised-sound-event-detection-2103.06581</loc><lastmod>2021-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/forward-backward-convolutional-recurrent-neural-networks-and-tag-conditioned-convolutional-neural-networks-for-weakly-labeled-semi-supervised-sound-event-detection-2103.06581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/forward-backward-convolutional-recurrent-neural-networks-and-tag-conditioned-convolutional-neural-networks-for-weakly-labeled-semi-supervised-sound-event-detection-2103.06581"/></url>
<url><loc>https://scifaro.com/en/abs/byol-for-audio-self-supervised-learning-for-general-purpose-audio-representation-2103.06695</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/byol-for-audio-self-supervised-learning-for-general-purpose-audio-representation-2103.06695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/byol-for-audio-self-supervised-learning-for-general-purpose-audio-representation-2103.06695"/></url>
<url><loc>https://scifaro.com/en/abs/learning-word-level-confidence-for-subword-end-to-end-asr-2103.06716</loc><lastmod>2021-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-word-level-confidence-for-subword-end-to-end-asr-2103.06716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-word-level-confidence-for-subword-end-to-end-asr-2103.06716"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-acoustic-unit-augmentation-with-bpe-dropout-for-low-resource-end-to-end-speech-recognition-2103.07186</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-acoustic-unit-augmentation-with-bpe-dropout-for-low-resource-end-to-end-speech-recognition-2103.07186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-acoustic-unit-augmentation-with-bpe-dropout-for-low-resource-end-to-end-speech-recognition-2103.07186"/></url>
<url><loc>https://scifaro.com/en/abs/signal-representations-for-synthesizing-audio-textures-with-generative-adversarial-networks-2103.07390</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-representations-for-synthesizing-audio-textures-with-generative-adversarial-networks-2103.07390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-representations-for-synthesizing-audio-textures-with-generative-adversarial-networks-2103.07390"/></url>
<url><loc>https://scifaro.com/en/abs/xlst-cross-lingual-self-training-to-learn-multilingual-representation-for-low-resource-speech-recognition-2103.08207</loc><lastmod>2021-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xlst-cross-lingual-self-training-to-learn-multilingual-representation-for-low-resource-speech-recognition-2103.08207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xlst-cross-lingual-self-training-to-learn-multilingual-representation-for-low-resource-speech-recognition-2103.08207"/></url>
<url><loc>https://scifaro.com/en/abs/wav2vec-c-a-self-supervised-model-for-speech-representation-learning-2103.08393</loc><lastmod>2021-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2vec-c-a-self-supervised-model-for-speech-representation-learning-2103.08393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2vec-c-a-self-supervised-model-for-speech-representation-learning-2103.08393"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-and-interpretable-neural-modeling-of-an-audio-distortion-effect-using-hyperconditioned-differentiable-biquads-2103.08709</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-and-interpretable-neural-modeling-of-an-audio-distortion-effect-using-hyperconditioned-differentiable-biquads-2103.08709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-and-interpretable-neural-modeling-of-an-audio-distortion-effect-using-hyperconditioned-differentiable-biquads-2103.08709"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-speaker-verification-with-target-speaker-enhancement-2103.08781</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-speaker-verification-with-target-speaker-enhancement-2103.08781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-speaker-verification-with-target-speaker-enhancement-2103.08781"/></url>
<url><loc>https://scifaro.com/en/abs/flow-based-self-supervised-density-estimation-for-anomalous-sound-detection-2103.08801</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flow-based-self-supervised-density-estimation-for-anomalous-sound-detection-2103.08801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flow-based-self-supervised-density-estimation-for-anomalous-sound-detection-2103.08801"/></url>
<url><loc>https://scifaro.com/en/abs/aec-in-a-netshell-on-target-and-topology-choices-for-fcrn-acoustic-echo-cancellation-2103.09007</loc><lastmod>2021-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aec-in-a-netshell-on-target-and-topology-choices-for-fcrn-acoustic-echo-cancellation-2103.09007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aec-in-a-netshell-on-target-and-topology-choices-for-fcrn-acoustic-echo-cancellation-2103.09007"/></url>
<url><loc>https://scifaro.com/en/abs/dicova-challenge-dataset-task-and-baseline-system-for-covid-19-diagnosis-using-acoustics-2103.09148</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dicova-challenge-dataset-task-and-baseline-system-for-covid-19-diagnosis-using-acoustics-2103.09148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dicova-challenge-dataset-task-and-baseline-system-for-covid-19-diagnosis-using-acoustics-2103.09148"/></url>
<url><loc>https://scifaro.com/en/abs/improving-zero-shot-voice-style-transfer-via-disentangled-representation-learning-2103.09420</loc><lastmod>2021-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-zero-shot-voice-style-transfer-via-disentangled-representation-learning-2103.09420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-zero-shot-voice-style-transfer-via-disentangled-representation-learning-2103.09420"/></url>
<url><loc>https://scifaro.com/en/abs/styler-style-factor-modeling-with-rapidity-and-robustness-via-speech-decomposition-for-expressive-and-controllable-neural-text-to-speech-2103.09474</loc><lastmod>2021-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styler-style-factor-modeling-with-rapidity-and-robustness-via-speech-decomposition-for-expressive-and-controllable-neural-text-to-speech-2103.09474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styler-style-factor-modeling-with-rapidity-and-robustness-via-speech-decomposition-for-expressive-and-controllable-neural-text-to-speech-2103.09474"/></url>
<url><loc>https://scifaro.com/en/abs/tstnn-two-stage-transformer-based-neural-network-for-speech-enhancement-in-the-time-domain-2103.09963</loc><lastmod>2021-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tstnn-two-stage-transformer-based-neural-network-for-speech-enhancement-in-the-time-domain-2103.09963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tstnn-two-stage-transformer-based-neural-network-for-speech-enhancement-in-the-time-domain-2103.09963"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-recent-neural-spoofing-countermeasures-for-synthetic-speech-detection-2103.11326</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-recent-neural-spoofing-countermeasures-for-synthetic-speech-detection-2103.11326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-recent-neural-spoofing-countermeasures-for-synthetic-speech-detection-2103.11326"/></url>
<url><loc>https://scifaro.com/en/abs/qucoughscope-an-artificially-intelligent-mobile-application-to-detect-asymptomatic-covid-19-patients-using-cough-and-breathing-sounds-2103.12063</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qucoughscope-an-artificially-intelligent-mobile-application-to-detect-asymptomatic-covid-19-patients-using-cough-and-breathing-sounds-2103.12063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qucoughscope-an-artificially-intelligent-mobile-application-to-detect-asymptomatic-covid-19-patients-using-cough-and-breathing-sounds-2103.12063"/></url>
<url><loc>https://scifaro.com/en/abs/joint-framework-with-deep-feature-distillation-and-adaptive-focal-loss-for-weakly-supervised-audio-tagging-and-acoustic-event-detection-2103.12388</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-framework-with-deep-feature-distillation-and-adaptive-focal-loss-for-weakly-supervised-audio-tagging-and-acoustic-event-detection-2103.12388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-framework-with-deep-feature-distillation-and-adaptive-focal-loss-for-weakly-supervised-audio-tagging-and-acoustic-event-detection-2103.12388"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-of-pre-trained-end-to-end-speech-recognition-with-generative-adversarial-networks-2103.13329</loc><lastmod>2021-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-of-pre-trained-end-to-end-speech-recognition-with-generative-adversarial-networks-2103.13329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-of-pre-trained-end-to-end-speech-recognition-with-generative-adversarial-networks-2103.13329"/></url>
<url><loc>https://scifaro.com/en/abs/efficienttdnn-efficient-architecture-search-for-speaker-recognition-2103.13581</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficienttdnn-efficient-architecture-search-for-speaker-recognition-2103.13581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficienttdnn-efficient-architecture-search-for-speaker-recognition-2103.13581"/></url>
<url><loc>https://scifaro.com/en/abs/radically-old-way-of-computing-spectra-applications-in-end-to-end-asr-2103.14129</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radically-old-way-of-computing-spectra-applications-in-end-to-end-asr-2103.14129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radically-old-way-of-computing-spectra-applications-in-end-to-end-asr-2103.14129"/></url>
<url><loc>https://scifaro.com/en/abs/residual-energy-based-models-for-end-to-end-speech-recognition-2103.14152</loc><lastmod>2021-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-energy-based-models-for-end-to-end-speech-recognition-2103.14152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-energy-based-models-for-end-to-end-speech-recognition-2103.14152"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-chorus-detection-for-popular-music-using-convolutional-neural-network-and-multi-task-learning-2103.14253</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-chorus-detection-for-popular-music-using-convolutional-neural-network-and-multi-task-learning-2103.14253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-chorus-detection-for-popular-music-using-convolutional-neural-network-and-multi-task-learning-2103.14253"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-based-discriminative-training-for-domain-compensation-in-acoustic-event-detection-with-frame-wise-classifier-2103.14297</loc><lastmod>2021-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-based-discriminative-training-for-domain-compensation-in-acoustic-event-detection-with-frame-wise-classifier-2103.14297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-based-discriminative-training-for-domain-compensation-in-acoustic-event-detection-with-frame-wise-classifier-2103.14297"/></url>
<url><loc>https://scifaro.com/en/abs/data-quality-as-predictor-of-voice-anti-spoofing-generalization-2103.14602</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-quality-as-predictor-of-voice-anti-spoofing-generalization-2103.14602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-quality-as-predictor-of-voice-anti-spoofing-generalization-2103.14602"/></url>
<url><loc>https://scifaro.com/en/abs/scalable-and-efficient-neural-speech-coding-a-hybrid-design-2103.14776</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scalable-and-efficient-neural-speech-coding-a-hybrid-design-2103.14776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scalable-and-efficient-neural-speech-coding-a-hybrid-design-2103.14776"/></url>
<url><loc>https://scifaro.com/en/abs/libri-adhoc40-a-dataset-collected-from-synchronized-ad-hoc-microphone-arrays-2103.15118</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libri-adhoc40-a-dataset-collected-from-synchronized-ad-hoc-microphone-arrays-2103.15118"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libri-adhoc40-a-dataset-collected-from-synchronized-ad-hoc-microphone-arrays-2103.15118"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-bias-in-automatic-speech-recognition-2103.15122</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-bias-in-automatic-speech-recognition-2103.15122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-bias-in-automatic-speech-recognition-2103.15122"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-sparsemax-based-channel-selection-for-speech-recognition-with-ad-hoc-microphone-arrays-2103.15305</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-sparsemax-based-channel-selection-for-speech-recognition-with-ad-hoc-microphone-arrays-2103.15305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-sparsemax-based-channel-selection-for-speech-recognition-with-ad-hoc-microphone-arrays-2103.15305"/></url>
<url><loc>https://scifaro.com/en/abs/improved-meta-learning-training-for-speaker-verification-2103.15421</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-meta-learning-training-for-speaker-verification-2103.15421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-meta-learning-training-for-speaker-verification-2103.15421"/></url>
<url><loc>https://scifaro.com/en/abs/mediaspeech-multilanguage-asr-benchmark-and-dataset-2103.16193</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mediaspeech-multilanguage-asr-benchmark-and-dataset-2103.16193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mediaspeech-multilanguage-asr-benchmark-and-dataset-2103.16193"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-verification-with-selective-auditory-attention-for-single-and-multi-talker-speech-2103.16269</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-verification-with-selective-auditory-attention-for-single-and-multi-talker-speech-2103.16269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-verification-with-selective-auditory-attention-for-single-and-multi-talker-speech-2103.16269"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-segment-based-speech-emotion-recognition-by-deep-self-learning-2103.16456</loc><lastmod>2021-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-segment-based-speech-emotion-recognition-by-deep-self-learning-2103.16456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-segment-based-speech-emotion-recognition-by-deep-self-learning-2103.16456"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-for-low-resource-speech-to-intent-applications-2103.16674</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-for-low-resource-speech-to-intent-applications-2103.16674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-for-low-resource-speech-to-intent-applications-2103.16674"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-pre-training-of-end-to-end-multi-talker-asr-for-meeting-transcription-with-single-distant-microphone-2103.16776</loc><lastmod>2021-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-pre-training-of-end-to-end-multi-talker-asr-for-meeting-transcription-with-single-distant-microphone-2103.16776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-pre-training-of-end-to-end-multi-talker-asr-for-meeting-transcription-with-single-distant-microphone-2103.16776"/></url>
<url><loc>https://scifaro.com/en/abs/integer-only-zero-shot-quantization-for-efficient-speech-recognition-2103.16827</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integer-only-zero-shot-quantization-for-efficient-speech-recognition-2103.16827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integer-only-zero-shot-quantization-for-efficient-speech-recognition-2103.16827"/></url>
<url><loc>https://scifaro.com/en/abs/tecanet-temporal-contextual-attention-network-for-environment-aware-speech-dereverberation-2103.16849</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tecanet-temporal-contextual-attention-network-for-environment-aware-speech-dereverberation-2103.16849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tecanet-temporal-contextual-attention-network-for-environment-aware-speech-dereverberation-2103.16849"/></url>
<url><loc>https://scifaro.com/en/abs/specaugment-a-hidden-space-data-augmentation-method-for-acoustic-scene-classification-2103.16858</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specaugment-a-hidden-space-data-augmentation-method-for-acoustic-scene-classification-2103.16858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specaugment-a-hidden-space-data-augmentation-method-for-acoustic-scene-classification-2103.16858"/></url>
<url><loc>https://scifaro.com/en/abs/deep-noise-suppression-with-non-intrusive-pesqnet-supervision-enabling-the-use-of-real-training-data-2103.17088</loc><lastmod>2021-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-noise-suppression-with-non-intrusive-pesqnet-supervision-enabling-the-use-of-real-training-data-2103.17088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-noise-suppression-with-non-intrusive-pesqnet-supervision-enabling-the-use-of-real-training-data-2103.17088"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-and-defenses-for-speech-recognition-systems-2103.17122</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-and-defenses-for-speech-recognition-systems-2103.17122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-and-defenses-for-speech-recognition-systems-2103.17122"/></url>
<url><loc>https://scifaro.com/en/abs/y-2-net-fcrn-for-acoustic-echo-and-noise-suppression-2103.17189</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/y-2-net-fcrn-for-acoustic-echo-and-noise-suppression-2103.17189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/y-2-net-fcrn-for-acoustic-echo-and-noise-suppression-2103.17189"/></url>
<url><loc>https://scifaro.com/en/abs/multi-encoder-learning-and-stream-fusion-for-transformer-based-end-to-end-automatic-speech-recognition-2104.00120</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-encoder-learning-and-stream-fusion-for-transformer-based-end-to-end-automatic-speech-recognition-2104.00120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-encoder-learning-and-stream-fusion-for-transformer-based-end-to-end-automatic-speech-recognition-2104.00120"/></url>
<url><loc>https://scifaro.com/en/abs/bidirectional-multiscale-feature-aggregation-for-speaker-verification-2104.00230</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bidirectional-multiscale-feature-aggregation-for-speaker-verification-2104.00230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bidirectional-multiscale-feature-aggregation-for-speaker-verification-2104.00230"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-spatial-speech-recognition-maps-based-on-simulated-speech-recognition-experiments-2104.00259</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-spatial-speech-recognition-maps-based-on-simulated-speech-recognition-experiments-2104.00259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-spatial-speech-recognition-maps-based-on-simulated-speech-recognition-experiments-2104.00259"/></url>
<url><loc>https://scifaro.com/en/abs/cycledrums-automatic-drum-arrangement-for-bass-lines-using-cyclegan-2104.00353</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycledrums-automatic-drum-arrangement-for-bass-lines-using-cyclegan-2104.00353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycledrums-automatic-drum-arrangement-for-bass-lines-using-cyclegan-2104.00353"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-text-to-speech-using-style-tag-2104.00436</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-text-to-speech-using-style-tag-2104.00436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-text-to-speech-using-style-tag-2104.00436"/></url>
<url><loc>https://scifaro.com/en/abs/fast-dctts-efficient-deep-convolutional-text-to-speech-2104.00624</loc><lastmod>2021-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-dctts-efficient-deep-convolutional-text-to-speech-2104.00624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-dctts-efficient-deep-convolutional-text-to-speech-2104.00624"/></url>
<url><loc>https://scifaro.com/en/abs/keyword-transformer-a-self-attention-model-for-keyword-spotting-2104.00769</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keyword-transformer-a-self-attention-model-for-keyword-spotting-2104.00769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keyword-transformer-a-self-attention-model-for-keyword-spotting-2104.00769"/></url>
<url><loc>https://scifaro.com/en/abs/assem-vc-realistic-voice-conversion-by-assembling-modern-speech-synthesis-techniques-2104.00931</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assem-vc-realistic-voice-conversion-by-assembling-modern-speech-synthesis-techniques-2104.00931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assem-vc-realistic-voice-conversion-by-assembling-modern-speech-synthesis-techniques-2104.00931"/></url>
<url><loc>https://scifaro.com/en/abs/interspeech-2021-conferencingspeech-challenge-towards-far-field-multi-channel-speech-enhancement-for-video-conferencing-2104.00960</loc><lastmod>2021-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interspeech-2021-conferencingspeech-challenge-towards-far-field-multi-channel-speech-enhancement-for-video-conferencing-2104.00960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interspeech-2021-conferencingspeech-challenge-towards-far-field-multi-channel-speech-enhancement-for-video-conferencing-2104.00960"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-acoustic-unit-discovery-by-leveraging-a-language-independent-subword-discriminative-feature-representation-2104.00994</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-acoustic-unit-discovery-by-leveraging-a-language-independent-subword-discriminative-feature-representation-2104.00994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-acoustic-unit-discovery-by-leveraging-a-language-independent-subword-discriminative-feature-representation-2104.00994"/></url>
<url><loc>https://scifaro.com/en/abs/metricnet-towards-improved-modeling-for-non-intrusive-speech-quality-assessment-2104.01227</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metricnet-towards-improved-modeling-for-non-intrusive-speech-quality-assessment-2104.01227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metricnet-towards-improved-modeling-for-non-intrusive-speech-quality-assessment-2104.01227"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-channel-effects-for-synthetic-voice-spoofing-countermeasure-systems-2104.01320</loc><lastmod>2026-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-channel-effects-for-synthetic-voice-spoofing-countermeasure-systems-2104.01320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-channel-effects-for-synthetic-voice-spoofing-countermeasure-systems-2104.01320"/></url>
<url><loc>https://scifaro.com/en/abs/exkaldi-rt-a-real-time-automatic-speech-recognition-extension-toolkit-of-kaldi-2104.01384</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exkaldi-rt-a-real-time-automatic-speech-recognition-extension-toolkit-of-kaldi-2104.01384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exkaldi-rt-a-real-time-automatic-speech-recognition-extension-toolkit-of-kaldi-2104.01384"/></url>
<url><loc>https://scifaro.com/en/abs/diff-tts-a-denoising-diffusion-model-for-text-to-speech-2104.01409</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-tts-a-denoising-diffusion-model-for-text-to-speech-2104.01409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-tts-a-denoising-diffusion-model-for-text-to-speech-2104.01409"/></url>
<url><loc>https://scifaro.com/en/abs/deep-feature-cyclegans-speaker-identity-preserving-non-parallel-microphone-telephone-domain-adaptation-for-speaker-verification-2104.01433</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-feature-cyclegans-speaker-identity-preserving-non-parallel-microphone-telephone-domain-adaptation-for-speaker-verification-2104.01433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-feature-cyclegans-speaker-identity-preserving-non-parallel-microphone-telephone-domain-adaptation-for-speaker-verification-2104.01433"/></url>
<url><loc>https://scifaro.com/en/abs/ecapa-tdnn-embeddings-for-speaker-diarization-2104.01466</loc><lastmod>2021-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecapa-tdnn-embeddings-for-speaker-diarization-2104.01466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecapa-tdnn-embeddings-for-speaker-diarization-2104.01466"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-joint-training-with-self-attention-mechanism-for-robust-end-to-end-speech-recognition-2104.01471</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-joint-training-with-self-attention-mechanism-for-robust-end-to-end-speech-recognition-2104.01471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-joint-training-with-self-attention-mechanism-for-robust-end-to-end-speech-recognition-2104.01471"/></url>
<url><loc>https://scifaro.com/en/abs/hi-fi-multi-speaker-english-tts-dataset-2104.01497</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hi-fi-multi-speaker-english-tts-dataset-2104.01497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hi-fi-multi-speaker-english-tts-dataset-2104.01497"/></url>
<url><loc>https://scifaro.com/en/abs/tsnat-two-step-non-autoregressvie-transformer-models-for-speech-recognition-2104.01522</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tsnat-two-step-non-autoregressvie-transformer-models-for-speech-recognition-2104.01522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tsnat-two-step-non-autoregressvie-transformer-models-for-speech-recognition-2104.01522"/></url>
<url><loc>https://scifaro.com/en/abs/attention-back-end-for-automatic-speaker-verification-with-multiple-enrollment-utterances-2104.01541</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-back-end-for-automatic-speaker-verification-with-multiple-enrollment-utterances-2104.01541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-back-end-for-automatic-speaker-verification-with-multiple-enrollment-utterances-2104.01541"/></url>
<url><loc>https://scifaro.com/en/abs/citrinet-closing-the-gap-between-non-autoregressive-and-autoregressive-end-to-end-models-for-automatic-speech-recognition-2104.01721</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/citrinet-closing-the-gap-between-non-autoregressive-and-autoregressive-end-to-end-models-for-automatic-speech-recognition-2104.01721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/citrinet-closing-the-gap-between-non-autoregressive-and-autoregressive-end-to-end-models-for-automatic-speech-recognition-2104.01721"/></url>
<url><loc>https://scifaro.com/en/abs/the-multi-speaker-multi-style-voice-cloning-challenge-2021-2104.01818</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-multi-speaker-multi-style-voice-cloning-challenge-2021-2104.01818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-multi-speaker-multi-style-voice-cloning-challenge-2021-2104.01818"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioned-acoustic-modeling-for-multi-speaker-conversational-asr-2104.01882</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioned-acoustic-modeling-for-multi-speaker-conversational-asr-2104.01882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioned-acoustic-modeling-for-multi-speaker-conversational-asr-2104.01882"/></url>
<url><loc>https://scifaro.com/en/abs/reformulating-dover-lap-label-mapping-as-a-graph-partitioning-problem-2104.01954</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reformulating-dover-lap-label-mapping-as-a-graph-partitioning-problem-2104.01954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reformulating-dover-lap-label-mapping-as-a-graph-partitioning-problem-2104.01954"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-personalized-speech-enhancement-through-self-supervised-learning-2104.02017</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-personalized-speech-enhancement-through-self-supervised-learning-2104.02017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-personalized-speech-enhancement-through-self-supervised-learning-2104.02017"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-speech-enhancement-through-self-supervised-data-augmentation-and-purification-2104.02018</loc><lastmod>2021-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-speech-enhancement-through-self-supervised-data-augmentation-and-purification-2104.02018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-speech-enhancement-through-self-supervised-data-augmentation-and-purification-2104.02018"/></url>
<url><loc>https://scifaro.com/en/abs/speakerstew-scaling-to-many-languages-with-a-triaged-multilingual-text-dependent-and-text-independent-speaker-verification-system-2104.02125</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speakerstew-scaling-to-many-languages-with-a-triaged-multilingual-text-dependent-and-text-independent-speaker-verification-system-2104.02125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speakerstew-scaling-to-many-languages-with-a-triaged-multilingual-text-dependent-and-text-independent-speaker-verification-system-2104.02125"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-attributed-asr-with-transformer-2104.02128</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-attributed-asr-with-transformer-2104.02128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-attributed-asr-with-transformer-2104.02128"/></url>
<url><loc>https://scifaro.com/en/abs/nu-wave-a-diffusion-probabilistic-model-for-neural-audio-upsampling-2104.02321</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nu-wave-a-diffusion-probabilistic-model-for-neural-audio-upsampling-2104.02321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nu-wave-a-diffusion-probabilistic-model-for-neural-audio-upsampling-2104.02321"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-english-speech-in-the-air-traffic-control-voice-communication-2104.02332</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-english-speech-in-the-air-traffic-control-voice-communication-2104.02332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-english-speech-in-the-air-traffic-control-voice-communication-2104.02332"/></url>
<url><loc>https://scifaro.com/en/abs/leap-submission-for-the-third-dihard-diarization-challenge-2104.02359</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leap-submission-for-the-third-dihard-diarization-challenge-2104.02359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leap-submission-for-the-third-dihard-diarization-challenge-2104.02359"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-frequency-translational-invariance-in-tdnns-and-frequency-positional-information-in-2d-resnets-to-enhance-speaker-verification-2104.02370</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-frequency-translational-invariance-in-tdnns-and-frequency-positional-information-in-2d-resnets-to-enhance-speaker-verification-2104.02370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-frequency-translational-invariance-in-tdnns-and-frequency-positional-information-in-2d-resnets-to-enhance-speaker-verification-2104.02370"/></url>
<url><loc>https://scifaro.com/en/abs/prosobeast-prosody-annotation-tool-2104.02397</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosobeast-prosody-annotation-tool-2104.02397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosobeast-prosody-annotation-tool-2104.02397"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-using-two-pass-leave-one-out-gaussian-plda-clustering-of-dnn-embeddings-2104.02469</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-using-two-pass-leave-one-out-gaussian-plda-clustering-of-dnn-embeddings-2104.02469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-using-two-pass-leave-one-out-gaussian-plda-clustering-of-dnn-embeddings-2104.02469"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-investigation-for-detecting-partially-spoofed-audio-2104.02518</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-investigation-for-detecting-partially-spoofed-audio-2104.02518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-investigation-for-detecting-partially-spoofed-audio-2104.02518"/></url>
<url><loc>https://scifaro.com/en/abs/lt-lm-a-novel-non-autoregressive-language-model-for-single-shot-lattice-rescoring-2104.02526</loc><lastmod>2021-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lt-lm-a-novel-non-autoregressive-language-model-for-single-shot-lattice-rescoring-2104.02526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lt-lm-a-novel-non-autoregressive-language-model-for-single-shot-lattice-rescoring-2104.02526"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embeddings-by-modeling-channel-wise-correlations-2104.02571</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embeddings-by-modeling-channel-wise-correlations-2104.02571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embeddings-by-modeling-channel-wise-correlations-2104.02571"/></url>
<url><loc>https://scifaro.com/en/abs/relaxing-the-conditional-independence-assumption-of-ctc-based-asr-by-conditioning-on-intermediate-predictions-2104.02724</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relaxing-the-conditional-independence-assumption-of-ctc-based-asr-by-conditioning-on-intermediate-predictions-2104.02724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relaxing-the-conditional-independence-assumption-of-ctc-based-asr-by-conditioning-on-intermediate-predictions-2104.02724"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-targeted-universal-adversarial-perturbations-to-end-to-end-asr-models-2104.02757</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-targeted-universal-adversarial-perturbations-to-end-to-end-asr-models-2104.02757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-targeted-universal-adversarial-perturbations-to-end-to-end-asr-models-2104.02757"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-rank-microphones-for-distant-speech-recognition-2104.02819</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-rank-microphones-for-distant-speech-recognition-2104.02819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-rank-microphones-for-distant-speech-recognition-2104.02819"/></url>
<url><loc>https://scifaro.com/en/abs/capturing-multi-resolution-context-by-dilated-self-attention-2104.02858</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/capturing-multi-resolution-context-by-dilated-self-attention-2104.02858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/capturing-multi-resolution-context-by-dilated-self-attention-2104.02858"/></url>
<url><loc>https://scifaro.com/en/abs/three-class-overlapped-speech-detection-using-a-convolutional-recurrent-neural-network-2104.02878</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/three-class-overlapped-speech-detection-using-a-convolutional-recurrent-neural-network-2104.02878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/three-class-overlapped-speech-detection-using-a-convolutional-recurrent-neural-network-2104.02878"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-speaker-embeddings-for-speaker-diarisation-2104.02879</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-speaker-embeddings-for-speaker-diarisation-2104.02879"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-speaker-embeddings-for-speaker-diarisation-2104.02879"/></url>
<url><loc>https://scifaro.com/en/abs/fsr-accelerating-the-inference-process-of-transducer-based-models-by-applying-fast-skip-regularization-2104.02882</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fsr-accelerating-the-inference-process-of-transducer-based-models-by-applying-fast-skip-regularization-2104.02882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fsr-accelerating-the-inference-process-of-transducer-based-models-by-applying-fast-skip-regularization-2104.02882"/></url>
<url><loc>https://scifaro.com/en/abs/s2vc-a-framework-for-any-to-any-voice-conversion-with-self-supervised-pretrained-representations-2104.02901</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s2vc-a-framework-for-any-to-any-voice-conversion-with-self-supervised-pretrained-representations-2104.02901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s2vc-a-framework-for-any-to-any-voice-conversion-with-self-supervised-pretrained-representations-2104.02901"/></url>
<url><loc>https://scifaro.com/en/abs/siamese-neural-network-with-joint-bayesian-model-structure-for-speaker-verification-2104.03004</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/siamese-neural-network-with-joint-bayesian-model-structure-for-speaker-verification-2104.03004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/siamese-neural-network-with-joint-bayesian-model-structure-for-speaker-verification-2104.03004"/></url>
<url><loc>https://scifaro.com/en/abs/the-as-nu-system-for-the-m2voc-challenge-2104.03009</loc><lastmod>2021-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-as-nu-system-for-the-m2voc-challenge-2104.03009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-as-nu-system-for-the-m2voc-challenge-2104.03009"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-self-supervised-representations-for-mos-prediction-2104.03017</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-self-supervised-representations-for-mos-prediction-2104.03017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-self-supervised-representations-for-mos-prediction-2104.03017"/></url>
<url><loc>https://scifaro.com/en/abs/audio-declipping-performance-enhancement-via-crossfading-2104.03074</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-declipping-performance-enhancement-via-crossfading-2104.03074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-declipping-performance-enhancement-via-crossfading-2104.03074"/></url>
<url><loc>https://scifaro.com/en/abs/pushing-the-limits-of-non-autoregressive-speech-recognition-2104.03416</loc><lastmod>2021-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pushing-the-limits-of-non-autoregressive-speech-recognition-2104.03416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pushing-the-limits-of-non-autoregressive-speech-recognition-2104.03416"/></url>
<url><loc>https://scifaro.com/en/abs/graph-attention-networks-for-anti-spoofing-2104.03654</loc><lastmod>2021-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-attention-networks-for-anti-spoofing-2104.03654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-attention-networks-for-anti-spoofing-2104.03654"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-based-distribution-regularization-for-speech-enhancement-2104.03759</loc><lastmod>2021-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-based-distribution-regularization-for-speech-enhancement-2104.03759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-based-distribution-regularization-for-speech-enhancement-2104.03759"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-representation-learning-for-behavior-modeling-using-triplet-enhanced-contextualized-networks-2104.03899</loc><lastmod>2021-04-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-representation-learning-for-behavior-modeling-using-triplet-enhanced-contextualized-networks-2104.03899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-representation-learning-for-behavior-modeling-using-triplet-enhanced-contextualized-networks-2104.03899"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-segmentation-for-overlap-aware-resegmentation-2104.04045</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-segmentation-for-overlap-aware-resegmentation-2104.04045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-segmentation-for-overlap-aware-resegmentation-2104.04045"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-depression-severity-level-classification-using-a-multi-stage-dilated-cnn-lstm-model-2104.04195</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-depression-severity-level-classification-using-a-multi-stage-dilated-cnn-lstm-model-2104.04195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-depression-severity-level-classification-using-a-multi-stage-dilated-cnn-lstm-model-2104.04195"/></url>
<url><loc>https://scifaro.com/en/abs/what-is-the-ground-truth-reliability-of-multi-annotator-data-for-audio-tagging-2104.04214</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-is-the-ground-truth-reliability-of-multi-annotator-data-for-audio-tagging-2104.04214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-is-the-ground-truth-reliability-of-multi-annotator-data-for-audio-tagging-2104.04214"/></url>
<url><loc>https://scifaro.com/en/abs/the-ntnu-taiwanese-asr-system-for-formosa-speech-recognition-challenge-2020-2104.04221</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ntnu-taiwanese-asr-system-for-formosa-speech-recognition-challenge-2020-2104.04221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ntnu-taiwanese-asr-system-for-formosa-speech-recognition-challenge-2020-2104.04221"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioned-target-speaker-extraction-based-on-customized-lstm-cells-2104.04234</loc><lastmod>2021-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioned-target-speaker-extraction-based-on-customized-lstm-cells-2104.04234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioned-target-speaker-extraction-based-on-customized-lstm-cells-2104.04234"/></url>
<url><loc>https://scifaro.com/en/abs/on-architectures-and-training-for-raw-waveform-feature-extraction-in-asr-2104.04298</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-architectures-and-training-for-raw-waveform-feature-extraction-in-asr-2104.04298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-architectures-and-training-for-raw-waveform-feature-extraction-in-asr-2104.04298"/></url>
<url><loc>https://scifaro.com/en/abs/accented-speech-recognition-inspired-by-human-perception-2104.04627</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accented-speech-recognition-inspired-by-human-perception-2104.04627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accented-speech-recognition-inspired-by-human-perception-2104.04627"/></url>
<url><loc>https://scifaro.com/en/abs/a-toolbox-for-construction-and-analysis-of-speech-datasets-2104.04896</loc><lastmod>2022-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-toolbox-for-construction-and-analysis-of-speech-datasets-2104.04896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-toolbox-for-construction-and-analysis-of-speech-datasets-2104.04896"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-system-description-for-the-interspeech-2021-auto-kws-challenge-2104.04993</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-system-description-for-the-interspeech-2021-auto-kws-challenge-2104.04993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-system-description-for-the-interspeech-2021-auto-kws-challenge-2104.04993"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-articulatory-movements-in-speech-production-with-transformer-networks-2104.05017</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-articulatory-movements-in-speech-production-with-transformer-networks-2104.05017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-articulatory-movements-in-speech-production-with-transformer-networks-2104.05017"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-binaural-rtf-vector-based-direction-of-arrival-estimation-methods-exploiting-an-external-microphone-2104.05079</loc><lastmod>2022-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-binaural-rtf-vector-based-direction-of-arrival-estimation-methods-exploiting-an-external-microphone-2104.05079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-binaural-rtf-vector-based-direction-of-arrival-estimation-methods-exploiting-an-external-microphone-2104.05079"/></url>
<url><loc>https://scifaro.com/en/abs/complex-spectral-mapping-with-attention-based-convolution-recurrent-neural-network-for-speech-enhancement-2104.05267</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-spectral-mapping-with-attention-based-convolution-recurrent-neural-network-for-speech-enhancement-2104.05267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-spectral-mapping-with-attention-based-convolution-recurrent-neural-network-for-speech-enhancement-2104.05267"/></url>
<url><loc>https://scifaro.com/en/abs/improved-conformer-based-end-to-end-speech-recognition-using-neural-architecture-search-2104.05390</loc><lastmod>2021-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-conformer-based-end-to-end-speech-recognition-using-neural-architecture-search-2104.05390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-conformer-based-end-to-end-speech-recognition-using-neural-architecture-search-2104.05390"/></url>
<url><loc>https://scifaro.com/en/abs/improvement-of-noise-robust-single-channel-voice-activity-detection-with-spatial-pre-processing-2104.05481</loc><lastmod>2021-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvement-of-noise-robust-single-channel-voice-activity-detection-with-spatial-pre-processing-2104.05481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvement-of-noise-robust-single-channel-voice-activity-detection-with-spatial-pre-processing-2104.05481"/></url>
<url><loc>https://scifaro.com/en/abs/l3das21-challenge-machine-learning-for-3d-audio-signal-processing-2104.05499</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l3das21-challenge-machine-learning-for-3d-audio-signal-processing-2104.05499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l3das21-challenge-machine-learning-for-3d-audio-signal-processing-2104.05499"/></url>
<url><loc>https://scifaro.com/en/abs/sc-glowtts-an-efficient-zero-shot-multi-speaker-text-to-speech-model-2104.05557</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sc-glowtts-an-efficient-zero-shot-multi-speaker-text-to-speech-model-2104.05557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sc-glowtts-an-efficient-zero-shot-multi-speaker-text-to-speech-model-2104.05557"/></url>
<url><loc>https://scifaro.com/en/abs/learning-metrics-from-mean-teacher-a-supervised-learning-method-for-improving-the-generalization-of-speaker-verification-system-2104.06604</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-metrics-from-mean-teacher-a-supervised-learning-method-for-improving-the-generalization-of-speaker-verification-system-2104.06604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-metrics-from-mean-teacher-a-supervised-learning-method-for-improving-the-generalization-of-speaker-verification-system-2104.06604"/></url>
<url><loc>https://scifaro.com/en/abs/audio-based-cough-counting-using-independent-subspace-analysis-2104.06798</loc><lastmod>2021-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-based-cough-counting-using-independent-subspace-analysis-2104.06798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-based-cough-counting-using-independent-subspace-analysis-2104.06798"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-f0-voice-conversion-based-on-dual-gan-with-convolutional-wavelet-kernels-2104.07283</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-f0-voice-conversion-based-on-dual-gan-with-convolutional-wavelet-kernels-2104.07283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-f0-voice-conversion-based-on-dual-gan-with-convolutional-wavelet-kernels-2104.07283"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-attentive-speech-emotion-recognition-2104.07288</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-attentive-speech-emotion-recognition-2104.07288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-attentive-speech-emotion-recognition-2104.07288"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-utility-of-multimodal-conversational-technology-and-audiovisual-analytic-measures-for-the-assessment-and-monitoring-of-amyotrophic-lateral-sclerosis-at-scale-2104.07310</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-utility-of-multimodal-conversational-technology-and-audiovisual-analytic-measures-for-the-assessment-and-monitoring-of-amyotrophic-lateral-sclerosis-at-scale-2104.07310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-utility-of-multimodal-conversational-technology-and-audiovisual-analytic-measures-for-the-assessment-and-monitoring-of-amyotrophic-lateral-sclerosis-at-scale-2104.07310"/></url>
<url><loc>https://scifaro.com/en/abs/envgan-adversarial-synthesis-of-environmental-sounds-for-data-augmentation-2104.07326</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/envgan-adversarial-synthesis-of-environmental-sounds-for-data-augmentation-2104.07326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/envgan-adversarial-synthesis-of-environmental-sounds-for-data-augmentation-2104.07326"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-independence-for-pretext-task-selection-in-self-supervised-speech-representation-learning-2104.07388</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-independence-for-pretext-task-selection-in-self-supervised-speech-representation-learning-2104.07388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-independence-for-pretext-task-selection-in-self-supervised-speech-representation-learning-2104.07388"/></url>
<url><loc>https://scifaro.com/en/abs/eat-enhanced-asr-tts-for-self-supervised-speech-recognition-2104.07474</loc><lastmod>2021-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eat-enhanced-asr-tts-for-self-supervised-speech-recognition-2104.07474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eat-enhanced-asr-tts-for-self-supervised-speech-recognition-2104.07474"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-keyword-spotting-by-capturing-long-range-interactions-with-temporal-lambda-networks-2104.08086</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-keyword-spotting-by-capturing-long-range-interactions-with-temporal-lambda-networks-2104.08086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-keyword-spotting-by-capturing-long-range-interactions-with-temporal-lambda-networks-2104.08086"/></url>
<url><loc>https://scifaro.com/en/abs/talknet-2-non-autoregressive-depth-wise-separable-convolutional-model-for-speech-synthesis-with-explicit-pitch-and-duration-prediction-2104.08189</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/talknet-2-non-autoregressive-depth-wise-separable-convolutional-model-for-speech-synthesis-with-explicit-pitch-and-duration-prediction-2104.08189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/talknet-2-non-autoregressive-depth-wise-separable-convolutional-model-for-speech-synthesis-with-explicit-pitch-and-duration-prediction-2104.08189"/></url>
<url><loc>https://scifaro.com/en/abs/kazakhtts-an-open-source-kazakh-text-to-speech-synthesis-dataset-2104.08459</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kazakhtts-an-open-source-kazakh-text-to-speech-synthesis-dataset-2104.08459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kazakhtts-an-open-source-kazakh-text-to-speech-synthesis-dataset-2104.08459"/></url>
<url><loc>https://scifaro.com/en/abs/multi-metric-optimization-using-generative-adversarial-networks-for-near-end-speech-intelligibility-enhancement-2104.08499</loc><lastmod>2021-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-metric-optimization-using-generative-adversarial-networks-for-near-end-speech-intelligibility-enhancement-2104.08499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-metric-optimization-using-generative-adversarial-networks-for-near-end-speech-intelligibility-enhancement-2104.08499"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-stroke-classification-of-tabla-accompaniment-in-hindustani-vocal-concert-audio-2104.09064</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-stroke-classification-of-tabla-accompaniment-in-hindustani-vocal-concert-audio-2104.09064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-stroke-classification-of-tabla-accompaniment-in-hindustani-vocal-concert-audio-2104.09064"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-cognitive-decline-using-speech-only-the-adresso-challenge-2104.09356</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-cognitive-decline-using-speech-only-the-adresso-challenge-2104.09356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-cognitive-decline-using-speech-only-the-adresso-challenge-2104.09356"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-representation-learning-with-path-integral-clustering-for-speaker-diarization-2104.09456</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-representation-learning-with-path-integral-clustering-for-speaker-diarization-2104.09456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-representation-learning-with-path-integral-clustering-for-speaker-diarization-2104.09456"/></url>
<url><loc>https://scifaro.com/en/abs/fusing-information-streams-in-end-to-end-audio-visual-speech-recognition-2104.09482</loc><lastmod>2021-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusing-information-streams-in-end-to-end-audio-visual-speech-recognition-2104.09482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusing-information-streams-in-end-to-end-audio-visual-speech-recognition-2104.09482"/></url>
<url><loc>https://scifaro.com/en/abs/nisqa-a-deep-cnn-self-attention-model-for-multidimensional-speech-quality-prediction-with-crowdsourced-datasets-2104.09494</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nisqa-a-deep-cnn-self-attention-model-for-multidimensional-speech-quality-prediction-with-crowdsourced-datasets-2104.09494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nisqa-a-deep-cnn-self-attention-model-for-multidimensional-speech-quality-prediction-with-crowdsourced-datasets-2104.09494"/></url>
<url><loc>https://scifaro.com/en/abs/robust-parameter-design-for-wiener-based-binaural-noise-reduction-methods-in-hearing-aids-2104.09615</loc><lastmod>2021-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-parameter-design-for-wiener-based-binaural-noise-reduction-methods-in-hearing-aids-2104.09615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-parameter-design-for-wiener-based-binaural-noise-reduction-methods-in-hearing-aids-2104.09615"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-remote-experiments-using-crowdsourcing-and-laboratory-experiments-on-speech-intelligibility-2104.10001</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-remote-experiments-using-crowdsourcing-and-laboratory-experiments-on-speech-intelligibility-2104.10001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-remote-experiments-using-crowdsourcing-and-laboratory-experiments-on-speech-intelligibility-2104.10001"/></url>
<url><loc>https://scifaro.com/en/abs/bias-aware-loss-for-training-image-and-speech-quality-prediction-models-from-multiple-datasets-2104.10217</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bias-aware-loss-for-training-image-and-speech-quality-prediction-models-from-multiple-datasets-2104.10217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bias-aware-loss-for-training-image-and-speech-quality-prediction-models-from-multiple-datasets-2104.10217"/></url>
<url><loc>https://scifaro.com/en/abs/label-synchronous-speech-to-text-alignment-for-asr-using-forward-and-backward-transformers-2104.10328</loc><lastmod>2021-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-synchronous-speech-to-text-alignment-for-asr-using-forward-and-backward-transformers-2104.10328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-synchronous-speech-to-text-alignment-for-asr-using-forward-and-backward-transformers-2104.10328"/></url>
<url><loc>https://scifaro.com/en/abs/scene-aware-far-field-automatic-speech-recognition-2104.10757</loc><lastmod>2021-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scene-aware-far-field-automatic-speech-recognition-2104.10757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scene-aware-far-field-automatic-speech-recognition-2104.10757"/></url>
<url><loc>https://scifaro.com/en/abs/hmm-free-encoder-pre-training-for-streaming-rnn-transducer-2104.10764</loc><lastmod>2021-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hmm-free-encoder-pre-training-for-streaming-rnn-transducer-2104.10764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hmm-free-encoder-pre-training-for-streaming-rnn-transducer-2104.10764"/></url>
<url><loc>https://scifaro.com/en/abs/building-bilingual-and-code-switched-voice-conversion-with-limited-training-data-using-embedding-consistency-loss-2104.10832</loc><lastmod>2021-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-bilingual-and-code-switched-voice-conversion-with-limited-training-data-using-embedding-consistency-loss-2104.10832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-bilingual-and-code-switched-voice-conversion-with-limited-training-data-using-embedding-consistency-loss-2104.10832"/></url>
<url><loc>https://scifaro.com/en/abs/nonlinear-spatial-filtering-in-multichannel-speech-enhancement-2104.11033</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonlinear-spatial-filtering-in-multichannel-speech-enhancement-2104.11033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonlinear-spatial-filtering-in-multichannel-speech-enhancement-2104.11033"/></url>
<url><loc>https://scifaro.com/en/abs/voice-privacy-with-smart-digital-assistants-in-educational-settings-2104.11038</loc><lastmod>2021-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-privacy-with-smart-digital-assistants-in-educational-settings-2104.11038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-privacy-with-smart-digital-assistants-in-educational-settings-2104.11038"/></url>
<url><loc>https://scifaro.com/en/abs/language-id-prediction-from-speech-using-self-attentive-pooling-and-1d-convolutions-2104.11985</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-id-prediction-from-speech-using-self-attentive-pooling-and-1d-convolutions-2104.11985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-id-prediction-from-speech-using-self-attentive-pooling-and-1d-convolutions-2104.11985"/></url>
<url><loc>https://scifaro.com/en/abs/phrase-break-prediction-with-bidirectional-encoder-representations-in-japanese-text-to-speech-synthesis-2104.12395</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phrase-break-prediction-with-bidirectional-encoder-representations-in-japanese-text-to-speech-synthesis-2104.12395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phrase-break-prediction-with-bidirectional-encoder-representations-in-japanese-text-to-speech-synthesis-2104.12395"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-data-augmentation-for-end-to-end-mandarin-speech-recognition-2104.12521</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-data-augmentation-for-end-to-end-mandarin-speech-recognition-2104.12521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-data-augmentation-for-end-to-end-mandarin-speech-recognition-2104.12521"/></url>
<url><loc>https://scifaro.com/en/abs/head-synchronous-decoding-for-transformer-based-streaming-asr-2104.12631</loc><lastmod>2021-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/head-synchronous-decoding-for-transformer-based-streaming-asr-2104.12631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/head-synchronous-decoding-for-transformer-based-streaming-asr-2104.12631"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-for-end-to-end-asr-word-and-utterance-confidence-with-deletion-prediction-2104.12870</loc><lastmod>2021-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-for-end-to-end-asr-word-and-utterance-confidence-with-deletion-prediction-2104.12870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-for-end-to-end-asr-word-and-utterance-confidence-with-deletion-prediction-2104.12870"/></url>
<url><loc>https://scifaro.com/en/abs/visualization-of-linear-operations-in-the-spherical-harmonics-domain-2104.13069</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualization-of-linear-operations-in-the-spherical-harmonics-domain-2104.13069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualization-of-linear-operations-in-the-spherical-harmonics-domain-2104.13069"/></url>
<url><loc>https://scifaro.com/en/abs/dechorate-a-calibrated-room-impulse-response-database-for-echo-aware-signal-processing-2104.13168</loc><lastmod>2021-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dechorate-a-calibrated-room-impulse-response-database-for-echo-aware-signal-processing-2104.13168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dechorate-a-calibrated-room-impulse-response-database-for-echo-aware-signal-processing-2104.13168"/></url>
<url><loc>https://scifaro.com/en/abs/iatos-ai-powered-pre-screening-tool-for-covid-19-from-cough-audio-samples-2104.13247</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iatos-ai-powered-pre-screening-tool-for-covid-19-from-cough-audio-samples-2104.13247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iatos-ai-powered-pre-screening-tool-for-covid-19-from-cough-audio-samples-2104.13247"/></url>
<url><loc>https://scifaro.com/en/abs/beamlearning-an-end-to-end-deep-learning-approach-for-the-angular-localization-of-sound-sources-using-raw-multichannel-acoustic-pressure-data-2104.13347</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beamlearning-an-end-to-end-deep-learning-approach-for-the-angular-localization-of-sound-sources-using-raw-multichannel-acoustic-pressure-data-2104.13347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beamlearning-an-end-to-end-deep-learning-approach-for-the-angular-localization-of-sound-sources-using-raw-multichannel-acoustic-pressure-data-2104.13347"/></url>
<url><loc>https://scifaro.com/en/abs/dasee-a-synthetic-database-of-domestic-acoustic-scenes-and-events-in-dementia-patients-environment-2104.13423</loc><lastmod>2021-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dasee-a-synthetic-database-of-domestic-acoustic-scenes-and-events-in-dementia-patients-environment-2104.13423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dasee-a-synthetic-database-of-domestic-acoustic-scenes-and-events-in-dementia-patients-environment-2104.13423"/></url>
<url><loc>https://scifaro.com/en/abs/amss-net-audio-manipulation-on-user-specified-sources-with-textual-queries-2104.13553</loc><lastmod>2021-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amss-net-audio-manipulation-on-user-specified-sources-with-textual-queries-2104.13553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amss-net-audio-manipulation-on-user-specified-sources-with-textual-queries-2104.13553"/></url>
<url><loc>https://scifaro.com/en/abs/idmt-traffic-an-open-benchmark-dataset-for-acoustic-traffic-monitoring-research-2104.13620</loc><lastmod>2021-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/idmt-traffic-an-open-benchmark-dataset-for-acoustic-traffic-monitoring-research-2104.13620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/idmt-traffic-an-open-benchmark-dataset-for-acoustic-traffic-monitoring-research-2104.13620"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-keyphrase-detection-using-speaker-and-environment-information-2104.13970</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-keyphrase-detection-using-speaker-and-environment-information-2104.13970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-keyphrase-detection-using-speaker-and-environment-information-2104.13970"/></url>
<url><loc>https://scifaro.com/en/abs/hardware-friendly-synaptic-orders-and-timescales-in-liquid-state-machines-for-speech-classification-2104.14264</loc><lastmod>2021-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hardware-friendly-synaptic-orders-and-timescales-in-liquid-state-machines-for-speech-classification-2104.14264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hardware-friendly-synaptic-orders-and-timescales-in-liquid-state-machines-for-speech-classification-2104.14264"/></url>
<url><loc>https://scifaro.com/en/abs/deformable-tdnn-with-adaptive-receptive-fields-for-speech-recognition-2104.14791</loc><lastmod>2021-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deformable-tdnn-with-adaptive-receptive-fields-for-speech-recognition-2104.14791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deformable-tdnn-with-adaptive-receptive-fields-for-speech-recognition-2104.14791"/></url>
<url><loc>https://scifaro.com/en/abs/crackle-detection-in-lung-sounds-using-transfer-learning-and-multi-input-convolitional-neural-networks-2104.14921</loc><lastmod>2021-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crackle-detection-in-lung-sounds-using-transfer-learning-and-multi-input-convolitional-neural-networks-2104.14921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crackle-detection-in-lung-sounds-using-transfer-learning-and-multi-input-convolitional-neural-networks-2104.14921"/></url>
<url><loc>https://scifaro.com/en/abs/full-reference-speech-quality-estimation-with-attentional-siamese-neural-networks-2105.00783</loc><lastmod>2021-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-reference-speech-quality-estimation-with-attentional-siamese-neural-networks-2105.00783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-reference-speech-quality-estimation-with-attentional-siamese-neural-networks-2105.00783"/></url>
<url><loc>https://scifaro.com/en/abs/on-addressing-practical-challenges-for-rnn-transducer-2105.00858</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-addressing-practical-challenges-for-rnn-transducer-2105.00858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-addressing-practical-challenges-for-rnn-transducer-2105.00858"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-and-maximizing-the-benefits-of-back-end-noise-adaption-on-attention-based-speech-recognition-models-2105.01134</loc><lastmod>2021-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-and-maximizing-the-benefits-of-back-end-noise-adaption-on-attention-based-speech-recognition-models-2105.01134"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-and-maximizing-the-benefits-of-back-end-noise-adaption-on-attention-based-speech-recognition-models-2105.01134"/></url>
<url><loc>https://scifaro.com/en/abs/speech-decomposition-based-on-a-hybrid-speech-model-and-optimal-segmentation-2105.01302</loc><lastmod>2021-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-decomposition-based-on-a-hybrid-speech-model-and-optimal-segmentation-2105.01302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-decomposition-based-on-a-hybrid-speech-model-and-optimal-segmentation-2105.01302"/></url>
<url><loc>https://scifaro.com/en/abs/performance-evaluation-of-deep-convolutional-maxout-neural-network-in-speech-recognition-2105.01399</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-evaluation-of-deep-convolutional-maxout-neural-network-in-speech-recognition-2105.01399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-evaluation-of-deep-convolutional-maxout-neural-network-in-speech-recognition-2105.01399"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-disentanglement-with-multilingual-and-monolingual-vq-vae-2105.01573</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-disentanglement-with-multilingual-and-monolingual-vq-vae-2105.01573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-disentanglement-with-multilingual-and-monolingual-vq-vae-2105.01573"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-based-speaker-normalization-for-acoustic-unit-discovery-2105.01786</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-based-speaker-normalization-for-acoustic-unit-discovery-2105.01786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-based-speaker-normalization-for-acoustic-unit-discovery-2105.01786"/></url>
<url><loc>https://scifaro.com/en/abs/accent-recognition-with-hybrid-phonetic-features-2105.01920</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-recognition-with-hybrid-phonetic-features-2105.01920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-recognition-with-hybrid-phonetic-features-2105.01920"/></url>
<url><loc>https://scifaro.com/en/abs/towards-interpretable-and-transferable-speech-emotion-recognition-latent-representation-based-analysis-of-features-methods-and-corpora-2105.02055</loc><lastmod>2021-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-interpretable-and-transferable-speech-emotion-recognition-latent-representation-based-analysis-of-features-methods-and-corpora-2105.02055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-interpretable-and-transferable-speech-emotion-recognition-latent-representation-based-analysis-of-features-methods-and-corpora-2105.02055"/></url>
<url><loc>https://scifaro.com/en/abs/diffsinger-singing-voice-synthesis-via-shallow-diffusion-mechanism-2105.02446</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsinger-singing-voice-synthesis-via-shallow-diffusion-mechanism-2105.02446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsinger-singing-voice-synthesis-via-shallow-diffusion-mechanism-2105.02446"/></url>
<url><loc>https://scifaro.com/en/abs/point-cloud-audio-processing-2105.02469</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/point-cloud-audio-processing-2105.02469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/point-cloud-audio-processing-2105.02469"/></url>
<url><loc>https://scifaro.com/en/abs/usm-sed-a-dataset-for-polyphonic-sound-event-detection-in-urban-sound-monitoring-scenarios-2105.02592</loc><lastmod>2021-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usm-sed-a-dataset-for-polyphonic-sound-event-detection-in-urban-sound-monitoring-scenarios-2105.02592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usm-sed-a-dataset-for-polyphonic-sound-event-detection-in-urban-sound-monitoring-scenarios-2105.02592"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-source-specific-sound-level-estimation-in-noisy-soundscapes-2105.02911</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-source-specific-sound-level-estimation-in-noisy-soundscapes-2105.02911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-source-specific-sound-level-estimation-in-noisy-soundscapes-2105.02911"/></url>
<url><loc>https://scifaro.com/en/abs/online-acoustic-system-identification-exploiting-kalman-filtering-and-an-adaptive-impulse-response-subspace-model-2105.03337</loc><lastmod>2021-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-acoustic-system-identification-exploiting-kalman-filtering-and-an-adaptive-impulse-response-subspace-model-2105.03337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-acoustic-system-identification-exploiting-kalman-filtering-and-an-adaptive-impulse-response-subspace-model-2105.03337"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-personalized-speech-enhancement-through-speaker-informed-model-selection-2105.03542</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-personalized-speech-enhancement-through-speaker-informed-model-selection-2105.03542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-personalized-speech-enhancement-through-speaker-informed-model-selection-2105.03542"/></url>
<url><loc>https://scifaro.com/en/abs/test-time-adaptation-toward-personalized-speech-enhancement-zero-shot-learning-with-knowledge-distillation-2105.03544</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/test-time-adaptation-toward-personalized-speech-enhancement-zero-shot-learning-with-knowledge-distillation-2105.03544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/test-time-adaptation-toward-personalized-speech-enhancement-zero-shot-learning-with-knowledge-distillation-2105.03544"/></url>
<url><loc>https://scifaro.com/en/abs/domestic-activities-clustering-from-audio-recordings-using-convolutional-capsule-autoencoder-network-2105.03583</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domestic-activities-clustering-from-audio-recordings-using-convolutional-capsule-autoencoder-network-2105.03583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domestic-activities-clustering-from-audio-recordings-using-convolutional-capsule-autoencoder-network-2105.03583"/></url>
<url><loc>https://scifaro.com/en/abs/latency-controlled-neural-architecture-search-for-streaming-speech-recognition-2105.03643</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latency-controlled-neural-architecture-search-for-streaming-speech-recognition-2105.03643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latency-controlled-neural-architecture-search-for-streaming-speech-recognition-2105.03643"/></url>
<url><loc>https://scifaro.com/en/abs/study-on-the-temporal-pooling-used-in-deep-neural-networks-for-speaker-verification-2105.04310</loc><lastmod>2021-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-on-the-temporal-pooling-used-in-deep-neural-networks-for-speaker-verification-2105.04310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-on-the-temporal-pooling-used-in-deep-neural-networks-for-speaker-verification-2105.04310"/></url>
<url><loc>https://scifaro.com/en/abs/cross-corpora-language-recognition-a-preliminary-investigation-with-indian-languages-2105.04639</loc><lastmod>2021-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-corpora-language-recognition-a-preliminary-investigation-with-indian-languages-2105.04639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-corpora-language-recognition-a-preliminary-investigation-with-indian-languages-2105.04639"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-signal-processing-with-black-box-audio-effects-2105.04752</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-signal-processing-with-black-box-audio-effects-2105.04752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-signal-processing-with-black-box-audio-effects-2105.04752"/></url>
<url><loc>https://scifaro.com/en/abs/deep-scattering-network-for-speech-emotion-recognition-2105.04806</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-scattering-network-for-speech-emotion-recognition-2105.04806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-scattering-network-for-speech-emotion-recognition-2105.04806"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-streaming-asr-model-delay-with-self-alignment-2105.05005</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-streaming-asr-model-delay-with-self-alignment-2105.05005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-streaming-asr-model-delay-with-self-alignment-2105.05005"/></url>
<url><loc>https://scifaro.com/en/abs/english-accent-accuracy-analysis-in-a-state-of-the-art-automatic-speech-recognition-system-2105.05041</loc><lastmod>2021-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/english-accent-accuracy-analysis-in-a-state-of-the-art-automatic-speech-recognition-system-2105.05041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/english-accent-accuracy-analysis-in-a-state-of-the-art-automatic-speech-recognition-system-2105.05041"/></url>
<url><loc>https://scifaro.com/en/abs/stutternet-stuttering-detection-using-time-delay-neural-network-2105.05599</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stutternet-stuttering-detection-using-time-delay-neural-network-2105.05599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stutternet-stuttering-detection-using-time-delay-neural-network-2105.05599"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-neural-beamforming-layers-for-multi-channel-speech-recognition-2105.05920</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-neural-beamforming-layers-for-multi-channel-speech-recognition-2105.05920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-neural-beamforming-layers-for-multi-channel-speech-recognition-2105.05920"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-transformer-for-hardware-efficient-voice-trigger-detection-and-false-trigger-mitigation-2105.06598</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-transformer-for-hardware-efficient-voice-trigger-detection-and-false-trigger-mitigation-2105.06598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-transformer-for-hardware-efficient-voice-trigger-detection-and-false-trigger-mitigation-2105.06598"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-speech-intelligibility-from-eeg-in-a-non-linear-classification-paradigm-2105.06844</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-speech-intelligibility-from-eeg-in-a-non-linear-classification-paradigm-2105.06844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-speech-intelligibility-from-eeg-in-a-non-linear-classification-paradigm-2105.06844"/></url>
<url><loc>https://scifaro.com/en/abs/sound-pressure-minimization-at-the-ear-drum-for-in-ear-anc-headphones-using-a-fixed-feedforward-remote-microphone-technique-2105.06894</loc><lastmod>2021-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-pressure-minimization-at-the-ear-drum-for-in-ear-anc-headphones-using-a-fixed-feedforward-remote-microphone-technique-2105.06894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-pressure-minimization-at-the-ear-drum-for-in-ear-anc-headphones-using-a-fixed-feedforward-remote-microphone-technique-2105.06894"/></url>
<url><loc>https://scifaro.com/en/abs/listen-with-intent-improving-speech-recognition-with-audio-to-intent-front-end-2105.07071</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-with-intent-improving-speech-recognition-with-audio-to-intent-front-end-2105.07071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-with-intent-improving-speech-recognition-with-audio-to-intent-front-end-2105.07071"/></url>
<url><loc>https://scifaro.com/en/abs/x-vectors-with-multi-scale-aggregation-for-speaker-diarization-2105.07367</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-vectors-with-multi-scale-aggregation-for-speaker-diarization-2105.07367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-vectors-with-multi-scale-aggregation-for-speaker-diarization-2105.07367"/></url>
<url><loc>https://scifaro.com/en/abs/dual-stage-low-complexity-reconfigurable-speech-enhancement-2105.07632</loc><lastmod>2021-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-stage-low-complexity-reconfigurable-speech-enhancement-2105.07632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-stage-low-complexity-reconfigurable-speech-enhancement-2105.07632"/></url>
<url><loc>https://scifaro.com/en/abs/a-time-domain-nearfield-frequency-invariant-beamforming-method-2105.08219</loc><lastmod>2021-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-time-domain-nearfield-frequency-invariant-beamforming-method-2105.08219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-time-domain-nearfield-frequency-invariant-beamforming-method-2105.08219"/></url>
<url><loc>https://scifaro.com/en/abs/deep-correlation-analysis-for-audio-eeg-decoding-2105.08492</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-correlation-analysis-for-audio-eeg-decoding-2105.08492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-correlation-analysis-for-audio-eeg-decoding-2105.08492"/></url>
<url><loc>https://scifaro.com/en/abs/handling-structural-mismatches-in-real-time-opera-tracking-2105.08531</loc><lastmod>2021-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/handling-structural-mismatches-in-real-time-opera-tracking-2105.08531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/handling-structural-mismatches-in-real-time-opera-tracking-2105.08531"/></url>
<url><loc>https://scifaro.com/en/abs/disentanglement-learning-for-variational-autoencoders-applied-to-audio-visual-speech-enhancement-2105.08970</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentanglement-learning-for-variational-autoencoders-applied-to-audio-visual-speech-enhancement-2105.08970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentanglement-learning-for-variational-autoencoders-applied-to-audio-visual-speech-enhancement-2105.08970"/></url>
<url><loc>https://scifaro.com/en/abs/advances-in-integration-of-end-to-end-neural-and-clustering-based-diarization-for-real-conversational-speech-2105.09040</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advances-in-integration-of-end-to-end-neural-and-clustering-based-diarization-for-real-conversational-speech-2105.09040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advances-in-integration-of-end-to-end-neural-and-clustering-based-diarization-for-real-conversational-speech-2105.09040"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-disentanglement-in-video-to-speech-conversion-2105.09652</loc><lastmod>2021-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-disentanglement-in-video-to-speech-conversion-2105.09652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-disentanglement-in-video-to-speech-conversion-2105.09652"/></url>
<url><loc>https://scifaro.com/en/abs/aluminum-nitride-two-dimensional-resonant-rods-2105.11232</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aluminum-nitride-two-dimensional-resonant-rods-2105.11232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aluminum-nitride-two-dimensional-resonant-rods-2105.11232"/></url>
<url><loc>https://scifaro.com/en/abs/training-speech-enhancement-systems-with-noisy-speech-datasets-2105.12315</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-speech-enhancement-systems-with-noisy-speech-datasets-2105.12315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-speech-enhancement-systems-with-noisy-speech-datasets-2105.12315"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-temporal-dependencies-for-cross-modal-music-piece-identification-2105.12536</loc><lastmod>2021-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-temporal-dependencies-for-cross-modal-music-piece-identification-2105.12536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-temporal-dependencies-for-cross-modal-music-piece-identification-2105.12536"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-measure-of-musical-noise-based-on-spectral-kurtosis-2105.13079</loc><lastmod>2021-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-measure-of-musical-noise-based-on-spectral-kurtosis-2105.13079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-measure-of-musical-noise-based-on-spectral-kurtosis-2105.13079"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-scene-classification-analysis-of-dcase-2021-challenge-submissions-2105.13675</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-scene-classification-analysis-of-dcase-2021-challenge-submissions-2105.13675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-scene-classification-analysis-of-dcase-2021-challenge-submissions-2105.13675"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-for-multi-device-audio-analysis-of-dcase-2021-challenge-systems-2105.13734</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-for-multi-device-audio-analysis-of-dcase-2021-challenge-systems-2105.13734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-for-multi-device-audio-analysis-of-dcase-2021-challenge-systems-2105.13734"/></url>
<url><loc>https://scifaro.com/en/abs/control-architecture-of-the-double-cross-correlation-processor-for-sampling-rate-offset-estimation-in-acoustic-sensor-networks-2105.13743</loc><lastmod>2021-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/control-architecture-of-the-double-cross-correlation-processor-for-sampling-rate-offset-estimation-in-acoustic-sensor-networks-2105.13743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/control-architecture-of-the-double-cross-correlation-processor-for-sampling-rate-offset-estimation-in-acoustic-sensor-networks-2105.13743"/></url>
<url><loc>https://scifaro.com/en/abs/diffsvc-a-diffusion-probabilistic-model-for-singing-voice-conversion-2105.13871</loc><lastmod>2021-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffsvc-a-diffusion-probabilistic-model-for-singing-voice-conversion-2105.13871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffsvc-a-diffusion-probabilistic-model-for-singing-voice-conversion-2105.13871"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-the-intelligibility-of-vocoded-speech-using-a-remote-testing-framework-2105.14120</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-the-intelligibility-of-vocoded-speech-using-a-remote-testing-framework-2105.14120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-the-intelligibility-of-vocoded-speech-using-a-remote-testing-framework-2105.14120"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-based-ratio-mask-estimation-for-reverberant-speech-enhancement-in-cochlear-implant-processors-2105.14135</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-based-ratio-mask-estimation-for-reverberant-speech-enhancement-in-cochlear-implant-processors-2105.14135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-based-ratio-mask-estimation-for-reverberant-speech-enhancement-in-cochlear-implant-processors-2105.14135"/></url>
<url><loc>https://scifaro.com/en/abs/dplm-a-deep-perceptual-spatial-audio-localization-metric-2105.14180</loc><lastmod>2021-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dplm-a-deep-perceptual-spatial-audio-localization-metric-2105.14180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dplm-a-deep-perceptual-spatial-audio-localization-metric-2105.14180"/></url>
<url><loc>https://scifaro.com/en/abs/parkinsonian-chinese-speech-analysis-towards-automatic-classification-of-parkinson-s-disease-2105.14704</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parkinsonian-chinese-speech-analysis-towards-automatic-classification-of-parkinson-s-disease-2105.14704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parkinsonian-chinese-speech-analysis-towards-automatic-classification-of-parkinson-s-disease-2105.14704"/></url>
<url><loc>https://scifaro.com/en/abs/pf-net-personalized-filter-for-speaker-recognition-from-raw-waveform-2105.14826</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pf-net-personalized-filter-for-speaker-recognition-from-raw-waveform-2105.14826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pf-net-personalized-filter-for-speaker-recognition-from-raw-waveform-2105.14826"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-audiovisual-synchronisation-for-ultrasound-tongue-imaging-2105.15162</loc><lastmod>2021-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-audiovisual-synchronisation-for-ultrasound-tongue-imaging-2105.15162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-audiovisual-synchronisation-for-ultrasound-tongue-imaging-2105.15162"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-zsvc-towards-zero-shot-voice-conversion-in-low-resource-contexts-2106.00043</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-zsvc-towards-zero-shot-voice-conversion-in-low-resource-contexts-2106.00043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-zsvc-towards-zero-shot-voice-conversion-in-low-resource-contexts-2106.00043"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-spoken-language-identification-using-self-attentive-pooling-and-deep-1d-time-channel-separable-convolutions-2106.00052</loc><lastmod>2021-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-spoken-language-identification-using-self-attentive-pooling-and-deep-1d-time-channel-separable-convolutions-2106.00052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-spoken-language-identification-using-self-attentive-pooling-and-deep-1d-time-channel-separable-convolutions-2106.00052"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-speech-representation-learning-for-parkinson-s-disease-classification-2106.00531</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-speech-representation-learning-for-parkinson-s-disease-classification-2106.00531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-speech-representation-learning-for-parkinson-s-disease-classification-2106.00531"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-point-of-care-diagnostics-for-covid-19-based-on-acoustics-and-symptoms-2106.00639</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-point-of-care-diagnostics-for-covid-19-based-on-acoustics-and-symptoms-2106.00639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-point-of-care-diagnostics-for-covid-19-based-on-acoustics-and-symptoms-2106.00639"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-efficient-and-semantic-mixture-invariant-training-taming-in-the-wild-unsupervised-sound-separation-2106.00847</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-efficient-and-semantic-mixture-invariant-training-taming-in-the-wild-unsupervised-sound-separation-2106.00847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-efficient-and-semantic-mixture-invariant-training-taming-in-the-wild-unsupervised-sound-separation-2106.00847"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-acoustic-echo-canceller-optimized-using-an-automatic-speech-recognizer-and-large-scale-synthetic-data-2106.00856</loc><lastmod>2021-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-acoustic-echo-canceller-optimized-using-an-automatic-speech-recognizer-and-large-scale-synthetic-data-2106.00856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-acoustic-echo-canceller-optimized-using-an-automatic-speech-recognizer-and-large-scale-synthetic-data-2106.00856"/></url>
<url><loc>https://scifaro.com/en/abs/should-we-always-separate-switching-between-enhanced-and-observed-signals-for-overlapping-speech-recognition-2106.00949</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/should-we-always-separate-switching-between-enhanced-and-observed-signals-for-overlapping-speech-recognition-2106.00949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/should-we-always-separate-switching-between-enhanced-and-observed-signals-for-overlapping-speech-recognition-2106.00949"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-frequency-domain-adaptive-system-identification-2106.01262</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-frequency-domain-adaptive-system-identification-2106.01262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-frequency-domain-adaptive-system-identification-2106.01262"/></url>
<url><loc>https://scifaro.com/en/abs/dual-script-e2e-framework-for-multilingual-and-code-switching-asr-2106.01400</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-script-e2e-framework-for-multilingual-and-code-switching-asr-2106.01400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-script-e2e-framework-for-multilingual-and-code-switching-asr-2106.01400"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-derived-loss-and-data-augmentation-for-dnn-based-multispeaker-speech-synthesis-2106.01789</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-derived-loss-and-data-augmentation-for-dnn-based-multispeaker-speech-synthesis-2106.01789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-derived-loss-and-data-augmentation-for-dnn-based-multispeaker-speech-synthesis-2106.01789"/></url>
<url><loc>https://scifaro.com/en/abs/an-objective-evaluation-of-the-effects-of-recording-conditions-and-speaker-characteristics-in-multi-speaker-deep-neural-speech-synthesis-2106.01812</loc><lastmod>2021-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-objective-evaluation-of-the-effects-of-recording-conditions-and-speaker-characteristics-in-multi-speaker-deep-neural-speech-synthesis-2106.01812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-objective-evaluation-of-the-effects-of-recording-conditions-and-speaker-characteristics-in-multi-speaker-deep-neural-speech-synthesis-2106.01812"/></url>
<url><loc>https://scifaro.com/en/abs/joint-multi-channel-dereverberation-and-noise-reduction-using-a-unified-convolutional-beamformer-with-sparse-priors-2106.01902</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-multi-channel-dereverberation-and-noise-reduction-using-a-unified-convolutional-beamformer-with-sparse-priors-2106.01902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-multi-channel-dereverberation-and-noise-reduction-using-a-unified-convolutional-beamformer-with-sparse-priors-2106.01902"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-model-for-voicing-silent-speech-2106.01933</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-model-for-voicing-silent-speech-2106.01933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-model-for-voicing-silent-speech-2106.01933"/></url>
<url><loc>https://scifaro.com/en/abs/segmental-contrastive-predictive-coding-for-unsupervised-word-segmentation-2106.02170</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segmental-contrastive-predictive-coding-for-unsupervised-word-segmentation-2106.02170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segmental-contrastive-predictive-coding-for-unsupervised-word-segmentation-2106.02170"/></url>
<url><loc>https://scifaro.com/en/abs/fre-gan-adversarial-frequency-consistent-audio-synthesis-2106.02297</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fre-gan-adversarial-frequency-consistent-audio-synthesis-2106.02297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fre-gan-adversarial-frequency-consistent-audio-synthesis-2106.02297"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-word-error-rate-training-with-language-model-fusion-for-end-to-end-speech-recognition-2106.02302</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-word-error-rate-training-with-language-model-fusion-for-end-to-end-speech-recognition-2106.02302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-word-error-rate-training-with-language-model-fusion-for-end-to-end-speech-recognition-2106.02302"/></url>
<url><loc>https://scifaro.com/en/abs/manifold-aware-deep-clustering-maximizing-angles-between-embedding-vectors-based-on-regular-simplex-2106.02331</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manifold-aware-deep-clustering-maximizing-angles-between-embedding-vectors-based-on-regular-simplex-2106.02331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manifold-aware-deep-clustering-maximizing-angles-between-embedding-vectors-based-on-regular-simplex-2106.02331"/></url>
<url><loc>https://scifaro.com/en/abs/a-residual-network-based-deep-learning-model-for-detection-of-covid-19-from-cough-sounds-2106.02348</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-residual-network-based-deep-learning-model-for-detection-of-covid-19-from-cough-sounds-2106.02348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-residual-network-based-deep-learning-model-for-detection-of-covid-19-from-cough-sounds-2106.02348"/></url>
<url><loc>https://scifaro.com/en/abs/toyadmos2-another-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-under-domain-shift-conditions-2106.02369</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toyadmos2-another-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-under-domain-shift-conditions-2106.02369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toyadmos2-another-dataset-of-miniature-machine-operating-sounds-for-anomalous-sound-detection-under-domain-shift-conditions-2106.02369"/></url>
<url><loc>https://scifaro.com/en/abs/approximate-fixed-points-in-recurrent-neural-networks-2106.02417</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/approximate-fixed-points-in-recurrent-neural-networks-2106.02417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/approximate-fixed-points-in-recurrent-neural-networks-2106.02417"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-audio-segments-in-call-center-recordings-using-convolutional-recurrent-neural-networks-2106.02422</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-audio-segments-in-call-center-recordings-using-convolutional-recurrent-neural-networks-2106.02422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-audio-segments-in-call-center-recordings-using-convolutional-recurrent-neural-networks-2106.02422"/></url>
<url><loc>https://scifaro.com/en/abs/teaching-keyword-spotters-to-spot-new-keywords-with-limited-examples-2106.02443</loc><lastmod>2021-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teaching-keyword-spotters-to-spot-new-keywords-with-limited-examples-2106.02443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teaching-keyword-spotters-to-spot-new-keywords-with-limited-examples-2106.02443"/></url>
<url><loc>https://scifaro.com/en/abs/do-you-listen-with-one-or-two-microphones-a-unified-asr-model-for-single-and-multi-channel-audio-2106.02750</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-you-listen-with-one-or-two-microphones-a-unified-asr-model-for-single-and-multi-channel-audio-2106.02750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-you-listen-with-one-or-two-microphones-a-unified-asr-model-for-single-and-multi-channel-audio-2106.02750"/></url>
<url><loc>https://scifaro.com/en/abs/an-attribute-aligned-strategy-for-learning-speech-representation-2106.02810</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attribute-aligned-strategy-for-learning-speech-representation-2106.02810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attribute-aligned-strategy-for-learning-speech-representation-2106.02810"/></url>
<url><loc>https://scifaro.com/en/abs/reinforce-aligner-reinforcement-alignment-search-for-robust-end-to-end-text-to-speech-2106.02830</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reinforce-aligner-reinforcement-alignment-search-for-robust-end-to-end-text-to-speech-2106.02830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reinforce-aligner-reinforcement-alignment-search-for-robust-end-to-end-text-to-speech-2106.02830"/></url>
<url><loc>https://scifaro.com/en/abs/human-listening-and-live-captioning-multi-task-training-for-speech-enhancement-2106.02896</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-listening-and-live-captioning-multi-task-training-for-speech-enhancement-2106.02896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-listening-and-live-captioning-multi-task-training-for-speech-enhancement-2106.02896"/></url>
<url><loc>https://scifaro.com/en/abs/rtneural-fast-neural-inferencing-for-real-time-systems-2106.03037</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rtneural-fast-neural-inferencing-for-real-time-systems-2106.03037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rtneural-fast-neural-inferencing-for-real-time-systems-2106.03037"/></url>
<url><loc>https://scifaro.com/en/abs/improving-channel-decorrelation-for-multi-channel-target-speech-extraction-2106.03113</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-channel-decorrelation-for-multi-channel-target-speech-extraction-2106.03113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-channel-decorrelation-for-multi-channel-target-speech-extraction-2106.03113"/></url>
<url><loc>https://scifaro.com/en/abs/meta-stylespeech-multi-speaker-adaptive-text-to-speech-generation-2106.03153</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-stylespeech-multi-speaker-adaptive-text-to-speech-generation-2106.03153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-stylespeech-multi-speaker-adaptive-text-to-speech-generation-2106.03153"/></url>
<url><loc>https://scifaro.com/en/abs/mathematical-vocoder-algorithm-modified-spectral-inversion-for-efficient-neural-speech-synthesis-2106.03167</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mathematical-vocoder-algorithm-modified-spectral-inversion-for-efficient-neural-speech-synthesis-2106.03167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mathematical-vocoder-algorithm-modified-spectral-inversion-for-efficient-neural-speech-synthesis-2106.03167"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-methods-for-end-to-end-speech-recognition-on-distant-talk-scenarios-2106.03419</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-methods-for-end-to-end-speech-recognition-on-distant-talk-scenarios-2106.03419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-methods-for-end-to-end-speech-recognition-on-distant-talk-scenarios-2106.03419"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-word-level-pronunciation-error-detection-in-non-native-english-speech-2106.03494</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-word-level-pronunciation-error-detection-in-non-native-english-speech-2106.03494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-word-level-pronunciation-error-detection-in-non-native-english-speech-2106.03494"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-clustered-federated-learning-in-complex-multi-source-acoustic-environments-2106.03671</loc><lastmod>2021-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-clustered-federated-learning-in-complex-multi-source-acoustic-environments-2106.03671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-clustered-federated-learning-in-complex-multi-source-acoustic-environments-2106.03671"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speaker-diarization-conditioned-on-speech-activity-and-overlap-detection-2106.04078</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-conditioned-on-speech-activity-and-overlap-detection-2106.04078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speaker-diarization-conditioned-on-speech-activity-and-overlap-detection-2106.04078"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-percepnet-real-time-low-complexity-target-voice-separation-and-enhancement-2106.04129</loc><lastmod>2021-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-percepnet-real-time-low-complexity-target-voice-separation-and-enhancement-2106.04129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-percepnet-real-time-low-complexity-target-voice-separation-and-enhancement-2106.04129"/></url>
<url><loc>https://scifaro.com/en/abs/speech-bert-embedding-for-improving-prosody-in-neural-tts-2106.04312</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-bert-embedding-for-improving-prosody-in-neural-tts-2106.04312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-bert-embedding-for-improving-prosody-in-neural-tts-2106.04312"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-discussion-on-dcase-2021-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-under-domain-shifted-conditions-2106.04492</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2021-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-under-domain-shifted-conditions-2106.04492"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2021-challenge-task-2-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-under-domain-shifted-conditions-2106.04492"/></url>
<url><loc>https://scifaro.com/en/abs/speechbrain-a-general-purpose-speech-toolkit-2106.04624</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechbrain-a-general-purpose-speech-toolkit-2106.04624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechbrain-a-general-purpose-speech-toolkit-2106.04624"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-training-with-pseudo-labeling-for-end-to-end-neural-diarization-2106.04764</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-training-with-pseudo-labeling-for-end-to-end-neural-diarization-2106.04764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-training-with-pseudo-labeling-for-end-to-end-neural-diarization-2106.04764"/></url>
<url><loc>https://scifaro.com/en/abs/deep-interaction-between-masking-and-mapping-targets-for-single-channel-speech-enhancement-2106.04878</loc><lastmod>2021-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-interaction-between-masking-and-mapping-targets-for-single-channel-speech-enhancement-2106.04878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-interaction-between-masking-and-mapping-targets-for-single-channel-speech-enhancement-2106.04878"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-transfer-learning-for-audio-tagging-and-sound-event-detection-2106.05408</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-transfer-learning-for-audio-tagging-and-sound-event-detection-2106.05408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-transfer-learning-for-audio-tagging-and-sound-event-detection-2106.05408"/></url>
<url><loc>https://scifaro.com/en/abs/relational-data-selection-for-data-augmentation-of-speaker-dependent-multi-band-melgan-vocoder-2106.05629</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relational-data-selection-for-data-augmentation-of-speaker-dependent-multi-band-melgan-vocoder-2106.05629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relational-data-selection-for-data-augmentation-of-speaker-dependent-multi-band-melgan-vocoder-2106.05629"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conversation-factorial-designs-for-diarization-error-analysis-2106.05792</loc><lastmod>2021-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conversation-factorial-designs-for-diarization-error-analysis-2106.05792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conversation-factorial-designs-for-diarization-error-analysis-2106.05792"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speech-recognition-in-sanskrit-a-new-speech-corpus-and-modelling-insights-2106.05852</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speech-recognition-in-sanskrit-a-new-speech-corpus-and-modelling-insights-2106.05852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speech-recognition-in-sanskrit-a-new-speech-corpus-and-modelling-insights-2106.05852"/></url>
<url><loc>https://scifaro.com/en/abs/improving-rnn-t-asr-performance-with-date-time-and-location-awareness-2106.06183</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-rnn-t-asr-performance-with-date-time-and-location-awareness-2106.06183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-rnn-t-asr-performance-with-date-time-and-location-awareness-2106.06183"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-synthetic-speech-detection-2106.06341</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-synthetic-speech-detection-2106.06341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-synthetic-speech-detection-2106.06341"/></url>
<url><loc>https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-self-supervised-auxiliary-tasks-2106.06858</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-self-supervised-auxiliary-tasks-2106.06858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-weakly-supervised-sound-event-detection-with-self-supervised-auxiliary-tasks-2106.06858"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-of-dynamic-reverberant-sound-scenes-with-directional-interferers-for-sound-event-localization-and-detection-2106.06999</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-of-dynamic-reverberant-sound-scenes-with-directional-interferers-for-sound-event-localization-and-detection-2106.06999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-of-dynamic-reverberant-sound-scenes-with-directional-interferers-for-sound-event-localization-and-detection-2106.06999"/></url>
<url><loc>https://scifaro.com/en/abs/wase-learning-when-to-attend-for-speaker-extraction-in-cocktail-party-environments-2106.07016</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wase-learning-when-to-attend-for-speaker-extraction-in-cocktail-party-environments-2106.07016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wase-learning-when-to-attend-for-speaker-extraction-in-cocktail-party-environments-2106.07016"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-learning-of-new-sound-classes-for-target-sound-extraction-2106.07144</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-learning-of-new-sound-classes-for-target-sound-extraction-2106.07144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-learning-of-new-sound-classes-for-target-sound-extraction-2106.07144"/></url>
<url><loc>https://scifaro.com/en/abs/selective-listening-by-synchronizing-speech-with-lips-2106.07150</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selective-listening-by-synchronizing-speech-with-lips-2106.07150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selective-listening-by-synchronizing-speech-with-lips-2106.07150"/></url>
<url><loc>https://scifaro.com/en/abs/speech-disorder-classification-using-extended-factorized-hierarchical-variational-auto-encoders-2106.07337</loc><lastmod>2021-06-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-disorder-classification-using-extended-factorized-hierarchical-variational-auto-encoders-2106.07337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-disorder-classification-using-extended-factorized-hierarchical-variational-auto-encoders-2106.07337"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-filter-network-speaker-aware-modeling-for-speech-separation-2106.07579</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-filter-network-speaker-aware-modeling-for-speech-separation-2106.07579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-filter-network-speaker-aware-modeling-for-speech-separation-2106.07579"/></url>
<url><loc>https://scifaro.com/en/abs/kaizen-continuously-improving-teacher-using-exponential-moving-average-for-semi-supervised-speech-recognition-2106.07759</loc><lastmod>2021-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kaizen-continuously-improving-teacher-using-exponential-moving-average-for-semi-supervised-speech-recognition-2106.07759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kaizen-continuously-improving-teacher-using-exponential-moving-average-for-semi-supervised-speech-recognition-2106.07759"/></url>
<url><loc>https://scifaro.com/en/abs/univnet-a-neural-vocoder-with-multi-resolution-spectrogram-discriminators-for-high-fidelity-waveform-generation-2106.07889</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/univnet-a-neural-vocoder-with-multi-resolution-spectrogram-discriminators-for-high-fidelity-waveform-generation-2106.07889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/univnet-a-neural-vocoder-with-multi-resolution-spectrogram-discriminators-for-high-fidelity-waveform-generation-2106.07889"/></url>
<url><loc>https://scifaro.com/en/abs/srib-submission-to-interspeech-2021-dicova-challenge-2106.07972</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/srib-submission-to-interspeech-2021-dicova-challenge-2106.07972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/srib-submission-to-interspeech-2021-dicova-challenge-2106.07972"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-opus-compression-for-far-field-automatic-speech-recognition-with-a-fixed-bitrate-budget-2106.07994</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-opus-compression-for-far-field-automatic-speech-recognition-with-a-fixed-bitrate-budget-2106.07994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-opus-compression-for-far-field-automatic-speech-recognition-with-a-fixed-bitrate-budget-2106.07994"/></url>
<url><loc>https://scifaro.com/en/abs/dialectal-speech-recognition-and-translation-of-swiss-german-speech-to-standard-german-text-microsoft-s-submission-to-swisstext-2021-2106.08126</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialectal-speech-recognition-and-translation-of-swiss-german-speech-to-standard-german-text-microsoft-s-submission-to-swisstext-2021-2106.08126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialectal-speech-recognition-and-translation-of-swiss-german-speech-to-standard-german-text-microsoft-s-submission-to-swisstext-2021-2106.08126"/></url>
<url><loc>https://scifaro.com/en/abs/e2e-based-multi-task-learning-approach-to-joint-speech-and-accent-recognition-2106.08211</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e2e-based-multi-task-learning-approach-to-joint-speech-and-accent-recognition-2106.08211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e2e-based-multi-task-learning-approach-to-joint-speech-and-accent-recognition-2106.08211"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-into-pre-training-strategies-for-spoken-language-understanding-on-dysarthric-speech-2106.08313</loc><lastmod>2021-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-into-pre-training-strategies-for-spoken-language-understanding-on-dysarthric-speech-2106.08313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-into-pre-training-strategies-for-spoken-language-understanding-on-dysarthric-speech-2106.08313"/></url>
<url><loc>https://scifaro.com/en/abs/adept-a-dataset-for-evaluating-prosody-transfer-2106.08321</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adept-a-dataset-for-evaluating-prosody-transfer-2106.08321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adept-a-dataset-for-evaluating-prosody-transfer-2106.08321"/></url>
<url><loc>https://scifaro.com/en/abs/ctrl-p-temporal-control-of-prosodic-variation-for-speech-synthesis-2106.08352</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctrl-p-temporal-control-of-prosodic-variation-for-speech-synthesis-2106.08352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctrl-p-temporal-control-of-prosodic-variation-for-speech-synthesis-2106.08352"/></url>
<url><loc>https://scifaro.com/en/abs/global-rhythm-style-transfer-without-text-transcriptions-2106.08519</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-rhythm-style-transfer-without-text-transcriptions-2106.08519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-rhythm-style-transfer-without-text-transcriptions-2106.08519"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-consonant-errors-in-disordered-speech-based-on-consonant-vowel-segment-embedding-2106.08536</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-consonant-errors-in-disordered-speech-based-on-consonant-vowel-segment-embedding-2106.08536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-consonant-errors-in-disordered-speech-based-on-consonant-vowel-segment-embedding-2106.08536"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-asr-combining-non-autoregressive-conformer-ctc-and-conditional-speaker-chain-2106.08595</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-asr-combining-non-autoregressive-conformer-ctc-and-conditional-speaker-chain-2106.08595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-asr-combining-non-autoregressive-conformer-ctc-and-conditional-speaker-chain-2106.08595"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-expressiveness-of-neural-vocoding-with-non-affine-normalizing-flows-2106.08649</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-expressiveness-of-neural-vocoding-with-non-affine-normalizing-flows-2106.08649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-expressiveness-of-neural-vocoding-with-non-affine-normalizing-flows-2106.08649"/></url>
<url><loc>https://scifaro.com/en/abs/dccrn-channel-wise-subband-dccrn-with-snr-estimation-for-speech-enhancement-2106.08672</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dccrn-channel-wise-subband-dccrn-with-snr-estimation-for-speech-enhancement-2106.08672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dccrn-channel-wise-subband-dccrn-with-snr-estimation-for-speech-enhancement-2106.08672"/></url>
<url><loc>https://scifaro.com/en/abs/enriching-source-style-transfer-in-recognition-synthesis-based-non-parallel-voice-conversion-2106.08741</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enriching-source-style-transfer-in-recognition-synthesis-based-non-parallel-voice-conversion-2106.08741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enriching-source-style-transfer-in-recognition-synthesis-based-non-parallel-voice-conversion-2106.08741"/></url>
<url><loc>https://scifaro.com/en/abs/momentum-pseudo-labeling-for-semi-supervised-speech-recognition-2106.08922</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/momentum-pseudo-labeling-for-semi-supervised-speech-recognition-2106.08922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/momentum-pseudo-labeling-for-semi-supervised-speech-recognition-2106.08922"/></url>
<url><loc>https://scifaro.com/en/abs/a-flow-based-neural-network-for-time-domain-speech-enhancement-2106.09008</loc><lastmod>2021-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-flow-based-neural-network-for-time-domain-speech-enhancement-2106.09008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-flow-based-neural-network-for-time-domain-speech-enhancement-2106.09008"/></url>
<url><loc>https://scifaro.com/en/abs/a-hands-on-comparison-of-dnns-for-dialog-separation-using-transfer-learning-from-music-source-separation-2106.09093</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hands-on-comparison-of-dnns-for-dialog-separation-using-transfer-learning-from-music-source-separation-2106.09093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hands-on-comparison-of-dnns-for-dialog-separation-using-transfer-learning-from-music-source-separation-2106.09093"/></url>
<url><loc>https://scifaro.com/en/abs/layer-pruning-on-demand-with-intermediate-ctc-2106.09216</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/layer-pruning-on-demand-with-intermediate-ctc-2106.09216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/layer-pruning-on-demand-with-intermediate-ctc-2106.09216"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-laws-for-acoustic-models-2106.09488</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-laws-for-acoustic-models-2106.09488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-laws-for-acoustic-models-2106.09488"/></url>
<url><loc>https://scifaro.com/en/abs/asr-adaptation-for-e-commerce-chatbots-using-cross-utterance-context-and-multi-task-language-modeling-2106.09532</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-adaptation-for-e-commerce-chatbots-using-cross-utterance-context-and-multi-task-language-modeling-2106.09532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-adaptation-for-e-commerce-chatbots-using-cross-utterance-context-and-multi-task-language-modeling-2106.09532"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-analysis-of-the-emotional-content-of-speech-in-daylong-child-centered-recordings-from-a-neonatal-intensive-care-unit-2106.09539</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-analysis-of-the-emotional-content-of-speech-in-daylong-child-centered-recordings-from-a-neonatal-intensive-care-unit-2106.09539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-analysis-of-the-emotional-content-of-speech-in-daylong-child-centered-recordings-from-a-neonatal-intensive-care-unit-2106.09539"/></url>
<url><loc>https://scifaro.com/en/abs/stan-a-stuttering-therapy-analysis-helper-2106.09545</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stan-a-stuttering-therapy-analysis-helper-2106.09545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stan-a-stuttering-therapy-analysis-helper-2106.09545"/></url>
<url><loc>https://scifaro.com/en/abs/localization-based-on-enhanced-low-frequency-interaural-level-difference-2106.09574</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-based-on-enhanced-low-frequency-interaural-level-difference-2106.09574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-based-on-enhanced-low-frequency-interaural-level-difference-2106.09574"/></url>
<url><loc>https://scifaro.com/en/abs/extracting-different-levels-of-speech-information-from-eeg-using-an-lstm-based-model-2106.09622</loc><lastmod>2021-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extracting-different-levels-of-speech-information-from-eeg-using-an-lstm-based-model-2106.09622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extracting-different-levels-of-speech-information-from-eeg-using-an-lstm-based-model-2106.09622"/></url>
<url><loc>https://scifaro.com/en/abs/wavegrad-2-iterative-refinement-for-text-to-speech-synthesis-2106.09660</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavegrad-2-iterative-refinement-for-text-to-speech-synthesis-2106.09660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavegrad-2-iterative-refinement-for-text-to-speech-synthesis-2106.09660"/></url>
<url><loc>https://scifaro.com/en/abs/multi-mode-transformer-transducer-with-stochastic-future-context-2106.09760</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-mode-transformer-transducer-with-stochastic-future-context-2106.09760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-mode-transformer-transducer-with-stochastic-future-context-2106.09760"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-single-step-non-autoregressive-transformer-for-automatic-speech-recognition-2106.09885</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-single-step-non-autoregressive-transformer-for-automatic-speech-recognition-2106.09885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-single-step-non-autoregressive-transformer-for-automatic-speech-recognition-2106.09885"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-german-asr-with-untranscribed-data-spoken-by-non-native-children-interspeech-2021-shared-task-spapl-system-2106.09963</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-german-asr-with-untranscribed-data-spoken-by-non-native-children-interspeech-2021-shared-task-spapl-system-2106.09963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-german-asr-with-untranscribed-data-spoken-by-non-native-children-interspeech-2021-shared-task-spapl-system-2106.09963"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-dysarthric-speech-detection-via-domain-adversarial-training-and-mutual-information-minimization-2106.10127</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-dysarthric-speech-detection-via-domain-adversarial-training-and-mutual-information-minimization-2106.10127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-for-dysarthric-speech-detection-via-domain-adversarial-training-and-mutual-information-minimization-2106.10127"/></url>
<url><loc>https://scifaro.com/en/abs/vqmivc-vector-quantization-and-mutual-information-based-unsupervised-speech-representation-disentanglement-for-one-shot-voice-conversion-2106.10132</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vqmivc-vector-quantization-and-mutual-information-based-unsupervised-speech-representation-disentanglement-for-one-shot-voice-conversion-2106.10132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vqmivc-vector-quantization-and-mutual-information-based-unsupervised-speech-representation-disentanglement-for-one-shot-voice-conversion-2106.10132"/></url>
<url><loc>https://scifaro.com/en/abs/golos-russian-dataset-for-speech-research-2106.10161</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/golos-russian-dataset-for-speech-research-2106.10161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/golos-russian-dataset-for-speech-research-2106.10161"/></url>
<url><loc>https://scifaro.com/en/abs/a-learned-conditional-prior-for-the-vae-acoustic-space-of-a-tts-system-2106.10229</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-learned-conditional-prior-for-the-vae-acoustic-space-of-a-tts-system-2106.10229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-learned-conditional-prior-for-the-vae-acoustic-space-of-a-tts-system-2106.10229"/></url>
<url><loc>https://scifaro.com/en/abs/on-device-personalization-of-automatic-speech-recognition-models-for-disordered-speech-2106.10259</loc><lastmod>2021-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-device-personalization-of-automatic-speech-recognition-models-for-disordered-speech-2106.10259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-device-personalization-of-automatic-speech-recognition-models-for-disordered-speech-2106.10259"/></url>
<url><loc>https://scifaro.com/en/abs/gpla-12-an-acoustic-signal-dataset-of-gas-pipeline-leakage-2106.10277</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gpla-12-an-acoustic-signal-dataset-of-gas-pipeline-leakage-2106.10277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gpla-12-an-acoustic-signal-dataset-of-gas-pipeline-leakage-2106.10277"/></url>
<url><loc>https://scifaro.com/en/abs/encoder-decoder-based-attractors-for-end-to-end-neural-diarization-2106.10654</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/encoder-decoder-based-attractors-for-end-to-end-neural-diarization-2106.10654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/encoder-decoder-based-attractors-for-end-to-end-neural-diarization-2106.10654"/></url>
<url><loc>https://scifaro.com/en/abs/meshrir-a-dataset-of-room-impulse-responses-on-meshed-grid-points-for-evaluating-sound-field-analysis-and-synthesis-methods-2106.10801</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meshrir-a-dataset-of-room-impulse-responses-on-meshed-grid-points-for-evaluating-sound-field-analysis-and-synthesis-methods-2106.10801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meshrir-a-dataset-of-room-impulse-responses-on-meshed-grid-points-for-evaluating-sound-field-analysis-and-synthesis-methods-2106.10801"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-of-accdoa-and-einv2-based-systems-with-d3nets-and-impulse-response-simulation-for-sound-event-localization-and-detection-2106.10806</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-of-accdoa-and-einv2-based-systems-with-d3nets-and-impulse-response-simulation-for-sound-event-localization-and-detection-2106.10806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-of-accdoa-and-einv2-based-systems-with-d3nets-and-impulse-response-simulation-for-sound-event-localization-and-detection-2106.10806"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-context-aware-conversational-speech-synthesis-2106.10828</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-context-aware-conversational-speech-synthesis-2106.10828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-context-aware-conversational-speech-synthesis-2106.10828"/></url>
<url><loc>https://scifaro.com/en/abs/glow-wavegan-learning-speech-representations-from-gan-based-variational-auto-encoder-for-high-fidelity-flow-based-speech-synthesis-2106.10831</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glow-wavegan-learning-speech-representations-from-gan-based-variational-auto-encoder-for-high-fidelity-flow-based-speech-synthesis-2106.10831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glow-wavegan-learning-speech-representations-from-gan-based-variational-auto-encoder-for-high-fidelity-flow-based-speech-synthesis-2106.10831"/></url>
<url><loc>https://scifaro.com/en/abs/non-native-english-lexicon-creation-for-bilingual-speech-synthesis-2106.10870</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-native-english-lexicon-creation-for-bilingual-speech-synthesis-2106.10870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-native-english-lexicon-creation-for-bilingual-speech-synthesis-2106.10870"/></url>
<url><loc>https://scifaro.com/en/abs/speech-prosody-and-remote-experiments-a-technical-report-2106.10915</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-prosody-and-remote-experiments-a-technical-report-2106.10915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-prosody-and-remote-experiments-a-technical-report-2106.10915"/></url>
<url><loc>https://scifaro.com/en/abs/towards-sound-based-testing-of-covid-19-summary-of-the-first-diagnostics-of-covid-19-using-acoustics-dicova-challenge-2106.10997</loc><lastmod>2021-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-sound-based-testing-of-covid-19-summary-of-the-first-diagnostics-of-covid-19-using-acoustics-dicova-challenge-2106.10997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-sound-based-testing-of-covid-19-summary-of-the-first-diagnostics-of-covid-19-using-acoustics-dicova-challenge-2106.10997"/></url>
<url><loc>https://scifaro.com/en/abs/unitts-residual-learning-of-unified-embedding-space-for-speech-style-control-2106.11171</loc><lastmod>2022-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unitts-residual-learning-of-unified-embedding-space-for-speech-style-control-2106.11171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unitts-residual-learning-of-unified-embedding-space-for-speech-style-control-2106.11171"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-tuning-of-a-voice-assistant-system-for-dysfluent-speech-2106.11759</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-tuning-of-a-voice-assistant-system-for-dysfluent-speech-2106.11759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-tuning-of-a-voice-assistant-system-for-dysfluent-speech-2106.11759"/></url>
<url><loc>https://scifaro.com/en/abs/improving-ultrasound-tongue-image-reconstruction-from-lip-images-using-self-supervised-learning-and-attention-mechanism-2106.11769</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-ultrasound-tongue-image-reconstruction-from-lip-images-using-self-supervised-learning-and-attention-mechanism-2106.11769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-ultrasound-tongue-image-reconstruction-from-lip-images-using-self-supervised-learning-and-attention-mechanism-2106.11769"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-based-low-latency-speech-separation-with-asymmetric-analysis-synthesis-window-pair-2106.11794</loc><lastmod>2021-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-based-low-latency-speech-separation-with-asymmetric-analysis-synthesis-window-pair-2106.11794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-based-low-latency-speech-separation-with-asymmetric-analysis-synthesis-window-pair-2106.11794"/></url>
<url><loc>https://scifaro.com/en/abs/srib-leap-submission-to-far-field-multi-channel-speech-enhancement-challenge-for-video-conferencing-2106.12763</loc><lastmod>2021-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/srib-leap-submission-to-far-field-multi-channel-speech-enhancement-challenge-for-video-conferencing-2106.12763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/srib-leap-submission-to-far-field-multi-channel-speech-enhancement-challenge-for-video-conferencing-2106.12763"/></url>
<url><loc>https://scifaro.com/en/abs/online-self-attentive-gated-rnns-for-real-time-speaker-separation-2106.13493</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-self-attentive-gated-rnns-for-real-time-speaker-separation-2106.13493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-self-attentive-gated-rnns-for-real-time-speaker-separation-2106.13493"/></url>
<url><loc>https://scifaro.com/en/abs/an-audio-envelope-generator-derived-from-industrial-process-control-2106.13966</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-audio-envelope-generator-derived-from-industrial-process-control-2106.13966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-audio-envelope-generator-derived-from-industrial-process-control-2106.13966"/></url>
<url><loc>https://scifaro.com/en/abs/mobile-microphone-array-speech-detection-and-localization-in-diverse-everyday-environments-2106.14787</loc><lastmod>2021-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mobile-microphone-array-speech-detection-and-localization-in-diverse-everyday-environments-2106.14787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mobile-microphone-array-speech-detection-and-localization-in-diverse-everyday-environments-2106.14787"/></url>
<url><loc>https://scifaro.com/en/abs/fastpitchformant-source-filter-based-decomposed-modeling-for-speech-synthesis-2106.15123</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastpitchformant-source-filter-based-decomposed-modeling-for-speech-synthesis-2106.15123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastpitchformant-source-filter-based-decomposed-modeling-for-speech-synthesis-2106.15123"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-context-aware-transformers-for-non-autoregressive-text-to-speech-2106.15144</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-context-aware-transformers-for-non-autoregressive-text-to-speech-2106.15144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-context-aware-transformers-for-non-autoregressive-text-to-speech-2106.15144"/></url>
<url><loc>https://scifaro.com/en/abs/ganspeech-adversarial-training-for-high-fidelity-multi-speaker-speech-synthesis-2106.15153</loc><lastmod>2021-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ganspeech-adversarial-training-for-high-fidelity-multi-speaker-speech-synthesis-2106.15153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ganspeech-adversarial-training-for-high-fidelity-multi-speaker-speech-synthesis-2106.15153"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2021-task-3-spectrotemporally-aligned-features-for-polyphonic-sound-event-localization-and-detection-2106.15190</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2021-task-3-spectrotemporally-aligned-features-for-polyphonic-sound-event-localization-and-detection-2106.15190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2021-task-3-spectrotemporally-aligned-features-for-polyphonic-sound-event-localization-and-detection-2106.15190"/></url>
<url><loc>https://scifaro.com/en/abs/n-singer-a-non-autoregressive-korean-singing-voice-synthesis-system-for-pronunciation-enhancement-2106.15205</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/n-singer-a-non-autoregressive-korean-singing-voice-synthesis-system-for-pronunciation-enhancement-2106.15205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/n-singer-a-non-autoregressive-korean-singing-voice-synthesis-system-for-pronunciation-enhancement-2106.15205"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-neural-speech-synthesis-2106.15561</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-neural-speech-synthesis-2106.15561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-neural-speech-synthesis-2106.15561"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-spectrogram-modelling-for-neural-text-to-speech-2106.15649</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-spectrogram-modelling-for-neural-text-to-speech-2106.15649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-spectrogram-modelling-for-neural-text-to-speech-2106.15649"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-generalized-monaural-and-binaural-auditory-model-for-psychoacoustics-and-speech-intelligibility-2106.15659</loc><lastmod>2026-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-generalized-monaural-and-binaural-auditory-model-for-psychoacoustics-and-speech-intelligibility-2106.15659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-generalized-monaural-and-binaural-auditory-model-for-psychoacoustics-and-speech-intelligibility-2106.15659"/></url>
<url><loc>https://scifaro.com/en/abs/df-conformer-integrated-architecture-of-conv-tasnet-and-conformer-using-linear-complexity-self-attention-for-speech-enhancement-2106.15813</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/df-conformer-integrated-architecture-of-conv-tasnet-and-conformer-using-linear-complexity-self-attention-for-speech-enhancement-2106.15813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/df-conformer-integrated-architecture-of-conv-tasnet-and-conformer-using-linear-complexity-self-attention-for-speech-enhancement-2106.15813"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-resolution-of-late-reverberation-in-virtual-acoustic-environments-2106.15888</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-resolution-of-late-reverberation-in-virtual-acoustic-environments-2106.15888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-resolution-of-late-reverberation-in-virtual-acoustic-environments-2106.15888"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-acoustic-scene-complexity-and-visual-scene-representation-on-auditory-perception-in-virtual-audio-visual-environments-2106.15909</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-acoustic-scene-complexity-and-visual-scene-representation-on-auditory-perception-in-virtual-audio-visual-environments-2106.15909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-acoustic-scene-complexity-and-visual-scene-representation-on-auditory-perception-in-virtual-audio-visual-environments-2106.15909"/></url>
<url><loc>https://scifaro.com/en/abs/an-integrated-framework-for-two-pass-personalized-voice-trigger-2106.15950</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-integrated-framework-for-two-pass-personalized-voice-trigger-2106.15950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-integrated-framework-for-two-pass-personalized-voice-trigger-2106.15950"/></url>
<url><loc>https://scifaro.com/en/abs/fast-processing-explains-the-effect-of-sound-reflection-on-binaural-unmasking-2106.16024</loc><lastmod>2021-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-processing-explains-the-effect-of-sound-reflection-on-binaural-unmasking-2106.16024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-processing-explains-the-effect-of-sound-reflection-on-binaural-unmasking-2106.16024"/></url>
<url><loc>https://scifaro.com/en/abs/computationally-efficient-spatial-rendering-of-late-reverberation-in-virtual-acoustic-environments-2107.00004</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computationally-efficient-spatial-rendering-of-late-reverberation-in-virtual-acoustic-environments-2107.00004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computationally-efficient-spatial-rendering-of-late-reverberation-in-virtual-acoustic-environments-2107.00004"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-level-confidence-classifier-for-asr-utterance-accuracy-and-application-to-acoustic-models-2107.00099</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-level-confidence-classifier-for-asr-utterance-accuracy-and-application-to-acoustic-models-2107.00099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-level-confidence-classifier-for-asr-utterance-accuracy-and-application-to-acoustic-models-2107.00099"/></url>
<url><loc>https://scifaro.com/en/abs/using-self-supervised-feature-extractors-with-attention-for-automatic-covid-19-detection-from-speech-2107.00112</loc><lastmod>2021-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-self-supervised-feature-extractors-with-attention-for-automatic-covid-19-detection-from-speech-2107.00112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-self-supervised-feature-extractors-with-attention-for-automatic-covid-19-detection-from-speech-2107.00112"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-of-tone-detection-thresholds-in-interaurally-delayed-noise-based-on-interaural-phase-difference-fluctuations-2107.00320</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-of-tone-detection-thresholds-in-interaurally-delayed-noise-based-on-interaural-phase-difference-fluctuations-2107.00320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-of-tone-detection-thresholds-in-interaurally-delayed-noise-based-on-interaural-phase-difference-fluctuations-2107.00320"/></url>
<url><loc>https://scifaro.com/en/abs/pretext-tasks-selection-for-multitask-self-supervised-speech-representation-learning-2107.00594</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretext-tasks-selection-for-multitask-self-supervised-speech-representation-learning-2107.00594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretext-tasks-selection-for-multitask-self-supervised-speech-representation-learning-2107.00594"/></url>
<url><loc>https://scifaro.com/en/abs/stableemit-selection-probability-discount-for-reducing-emission-latency-of-streaming-monotonic-attention-asr-2107.00635</loc><lastmod>2021-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stableemit-selection-probability-discount-for-reducing-emission-latency-of-streaming-monotonic-attention-asr-2107.00635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stableemit-selection-probability-discount-for-reducing-emission-latency-of-streaming-monotonic-attention-asr-2107.00635"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-st-iwslt-2021-offline-speech-translation-system-2107.00636</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-st-iwslt-2021-offline-speech-translation-system-2107.00636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-st-iwslt-2021-offline-speech-translation-system-2107.00636"/></url>
<url><loc>https://scifaro.com/en/abs/combining-frame-synchronous-and-label-synchronous-systems-for-speech-recognition-2107.00764</loc><lastmod>2021-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-frame-synchronous-and-label-synchronous-systems-for-speech-recognition-2107.00764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-frame-synchronous-and-label-synchronous-systems-for-speech-recognition-2107.00764"/></url>
<url><loc>https://scifaro.com/en/abs/multi-user-voicefilter-lite-via-attentive-speaker-embedding-2107.01201</loc><lastmod>2021-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-user-voicefilter-lite-via-attentive-speaker-embedding-2107.01201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-user-voicefilter-lite-via-attentive-speaker-embedding-2107.01201"/></url>
<url><loc>https://scifaro.com/en/abs/dual-causal-non-causal-self-attention-for-streaming-end-to-end-speech-recognition-2107.01269</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-causal-non-causal-self-attention-for-streaming-end-to-end-speech-recognition-2107.01269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-causal-non-causal-self-attention-for-streaming-end-to-end-speech-recognition-2107.01269"/></url>
<url><loc>https://scifaro.com/en/abs/relaxed-attention-a-simple-method-to-boost-performance-of-end-to-end-automatic-speech-recognition-2107.01275</loc><lastmod>2021-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relaxed-attention-a-simple-method-to-boost-performance-of-end-to-end-automatic-speech-recognition-2107.01275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relaxed-attention-a-simple-method-to-boost-performance-of-end-to-end-automatic-speech-recognition-2107.01275"/></url>
<url><loc>https://scifaro.com/en/abs/tenet-a-time-reversal-enhancement-network-for-noise-robust-asr-2107.01531</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tenet-a-time-reversal-enhancement-network-for-noise-robust-asr-2107.01531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tenet-a-time-reversal-enhancement-network-for-noise-robust-asr-2107.01531"/></url>
<url><loc>https://scifaro.com/en/abs/towards-neural-diarization-for-unlimited-numbers-of-speakers-using-global-and-local-attractors-2107.01545</loc><lastmod>2021-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-neural-diarization-for-unlimited-numbers-of-speakers-using-global-and-local-attractors-2107.01545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-neural-diarization-for-unlimited-numbers-of-speakers-using-global-and-local-attractors-2107.01545"/></url>
<url><loc>https://scifaro.com/en/abs/editspeech-a-text-based-speech-editing-system-using-partial-inference-and-bidirectional-fusion-2107.01554</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/editspeech-a-text-based-speech-editing-system-using-partial-inference-and-bidirectional-fusion-2107.01554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/editspeech-a-text-based-speech-editing-system-using-partial-inference-and-bidirectional-fusion-2107.01554"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-eight-human-auditory-models-of-monaural-processing-2107.01753</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-eight-human-auditory-models-of-monaural-processing-2107.01753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-eight-human-auditory-models-of-monaural-processing-2107.01753"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-practical-aspects-of-single-channel-speech-separation-for-asr-2107.01922</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-practical-aspects-of-single-channel-speech-separation-for-asr-2107.01922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-practical-aspects-of-single-channel-speech-separation-for-asr-2107.01922"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-from-text-and-ultrasound-tongue-image-based-articulatory-input-2107.02003</loc><lastmod>2021-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-from-text-and-ultrasound-tongue-image-based-articulatory-input-2107.02003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-from-text-and-ultrasound-tongue-image-based-articulatory-input-2107.02003"/></url>
<url><loc>https://scifaro.com/en/abs/separation-guided-speaker-diarization-in-realistic-mismatched-conditions-2107.02357</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separation-guided-speaker-diarization-in-realistic-mismatched-conditions-2107.02357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separation-guided-speaker-diarization-in-realistic-mismatched-conditions-2107.02357"/></url>
<url><loc>https://scifaro.com/en/abs/location-location-enhancing-the-evaluation-of-text-to-speech-synthesis-using-the-rapid-prosody-transcription-paradigm-2107.02527</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/location-location-enhancing-the-evaluation-of-text-to-speech-synthesis-using-the-rapid-prosody-transcription-paradigm-2107.02527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/location-location-enhancing-the-evaluation-of-text-to-speech-synthesis-using-the-rapid-prosody-transcription-paradigm-2107.02527"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-2107.02670</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-2107.02670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-2107.02670"/></url>
<url><loc>https://scifaro.com/en/abs/lexical-access-model-for-italian-modeling-human-speech-processing-identification-of-words-in-running-speech-toward-lexical-access-based-on-the-detection-of-landmarks-and-other-acoustic-cues-to-features-2107.02720</loc><lastmod>2021-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lexical-access-model-for-italian-modeling-human-speech-processing-identification-of-words-in-running-speech-toward-lexical-access-based-on-the-detection-of-landmarks-and-other-acoustic-cues-to-features-2107.02720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lexical-access-model-for-italian-modeling-human-speech-processing-identification-of-words-in-running-speech-toward-lexical-access-based-on-the-detection-of-landmarks-and-other-acoustic-cues-to-features-2107.02720"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-modular-and-joint-approaches-for-speaker-attributed-asr-on-monaural-long-form-audio-2107.02852</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-modular-and-joint-approaches-for-speaker-attributed-asr-on-monaural-long-form-audio-2107.02852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-modular-and-joint-approaches-for-speaker-attributed-asr-on-monaural-long-form-audio-2107.02852"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-ctc-crf-based-end-to-end-speech-recognition-with-wordpieces-and-conformers-2107.03007</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-ctc-crf-based-end-to-end-speech-recognition-with-wordpieces-and-conformers-2107.03007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-ctc-crf-based-end-to-end-speech-recognition-with-wordpieces-and-conformers-2107.03007"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-recognition-accuracy-of-local-poi-using-geographical-models-2107.03165</loc><lastmod>2021-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-recognition-accuracy-of-local-poi-using-geographical-models-2107.03165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-recognition-accuracy-of-local-poi-using-geographical-models-2107.03165"/></url>
<url><loc>https://scifaro.com/en/abs/heavily-augmented-sound-event-detection-utilizing-weak-predictions-2107.03649</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heavily-augmented-sound-event-detection-utilizing-weak-predictions-2107.03649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heavily-augmented-sound-event-detection-utilizing-weak-predictions-2107.03649"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-voice-conversion-a-joint-framework-for-speaker-identity-and-emotional-style-transfer-2107.03748</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-voice-conversion-a-joint-framework-for-speaker-identity-and-emotional-style-transfer-2107.03748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-voice-conversion-a-joint-framework-for-speaker-identity-and-emotional-style-transfer-2107.03748"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-supervised-models-and-learned-speech-representations-for-classifying-intelligibility-of-disordered-speech-on-selected-phrases-2107.03985</loc><lastmod>2021-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-supervised-models-and-learned-speech-representations-for-classifying-intelligibility-of-disordered-speech-on-selected-phrases-2107.03985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-supervised-models-and-learned-speech-representations-for-classifying-intelligibility-of-disordered-speech-on-selected-phrases-2107.03985"/></url>
<url><loc>https://scifaro.com/en/abs/on-lattice-free-boosted-mmi-training-of-hmm-and-ctc-based-full-context-asr-models-2107.04154</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-lattice-free-boosted-mmi-training-of-hmm-and-ctc-based-full-context-asr-models-2107.04154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-lattice-free-boosted-mmi-training-of-hmm-and-ctc-based-full-context-asr-models-2107.04154"/></url>
<url><loc>https://scifaro.com/en/abs/dropout-regularization-for-self-supervised-learning-of-transformer-encoder-speech-representation-2107.04227</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dropout-regularization-for-self-supervised-learning-of-transformer-encoder-speech-representation-2107.04227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dropout-regularization-for-self-supervised-learning-of-transformer-encoder-speech-representation-2107.04227"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-multi-target-in-multi-stage-speech-enhancement-model-for-better-generalization-2107.04232</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-multi-target-in-multi-stage-speech-enhancement-model-for-better-generalization-2107.04232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-multi-target-in-multi-stage-speech-enhancement-model-for-better-generalization-2107.04232"/></url>
<url><loc>https://scifaro.com/en/abs/blind-source-separation-in-polyphonic-music-recordings-using-deep-neural-networks-trained-via-policy-gradients-2107.04235</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-source-separation-in-polyphonic-music-recordings-using-deep-neural-networks-trained-via-policy-gradients-2107.04235"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-source-separation-in-polyphonic-music-recordings-using-deep-neural-networks-trained-via-policy-gradients-2107.04235"/></url>
<url><loc>https://scifaro.com/en/abs/loss-prediction-end-to-end-active-learning-approach-for-speech-recognition-2107.04289</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loss-prediction-end-to-end-active-learning-approach-for-speech-recognition-2107.04289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loss-prediction-end-to-end-active-learning-approach-for-speech-recognition-2107.04289"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-to-classify-and-detect-adversarial-attacks-against-speaker-and-speech-recognition-systems-2107.04448</loc><lastmod>2021-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-to-classify-and-detect-adversarial-attacks-against-speaker-and-speech-recognition-systems-2107.04448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-to-classify-and-detect-adversarial-attacks-against-speaker-and-speech-recognition-systems-2107.04448"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-bayesian-framework-for-adaptive-speech-duration-modification-2107.04973</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-bayesian-framework-for-adaptive-speech-duration-modification-2107.04973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-bayesian-framework-for-adaptive-speech-duration-modification-2107.04973"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-based-deep-learning-denoiser-as-a-defense-against-adversarial-attacks-on-asr-systems-2107.05222</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-based-deep-learning-denoiser-as-a-defense-against-adversarial-attacks-on-asr-systems-2107.05222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-based-deep-learning-denoiser-as-a-defense-against-adversarial-attacks-on-asr-systems-2107.05222"/></url>
<url><loc>https://scifaro.com/en/abs/unispeech-at-scale-an-empirical-study-of-pre-training-method-on-large-scale-speech-recognition-dataset-2107.05233</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unispeech-at-scale-an-empirical-study-of-pre-training-method-on-large-scale-speech-recognition-dataset-2107.05233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unispeech-at-scale-an-empirical-study-of-pre-training-method-on-large-scale-speech-recognition-dataset-2107.05233"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-a-tutorial-2107.05463</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-a-tutorial-2107.05463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-a-tutorial-2107.05463"/></url>
<url><loc>https://scifaro.com/en/abs/extending-text-to-speech-synthesis-with-articulatory-movement-prediction-using-ultrasound-tongue-imaging-2107.05550</loc><lastmod>2021-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-text-to-speech-synthesis-with-articulatory-movement-prediction-using-ultrasound-tongue-imaging-2107.05550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-text-to-speech-synthesis-with-articulatory-movement-prediction-using-ultrasound-tongue-imaging-2107.05550"/></url>
<url><loc>https://scifaro.com/en/abs/auc-optimization-for-robust-small-footprint-keyword-spotting-with-limited-training-data-2107.05859</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auc-optimization-for-robust-small-footprint-keyword-spotting-with-limited-training-data-2107.05859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auc-optimization-for-robust-small-footprint-keyword-spotting-with-limited-training-data-2107.05859"/></url>
<url><loc>https://scifaro.com/en/abs/a-configurable-multilingual-model-is-all-you-need-to-recognize-all-languages-2107.05876</loc><lastmod>2021-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-configurable-multilingual-model-is-all-you-need-to-recognize-all-languages-2107.05876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-configurable-multilingual-model-is-all-you-need-to-recognize-all-languages-2107.05876"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-audio-source-separation-2107.06467</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-audio-source-separation-2107.06467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-audio-source-separation-2107.06467"/></url>
<url><loc>https://scifaro.com/en/abs/is-someone-speaking-exploring-long-term-temporal-features-for-audio-visual-active-speaker-detection-2107.06592</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-someone-speaking-exploring-long-term-temporal-features-for-audio-visual-active-speaker-detection-2107.06592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-someone-speaking-exploring-long-term-temporal-features-for-audio-visual-active-speaker-detection-2107.06592"/></url>
<url><loc>https://scifaro.com/en/abs/many-to-many-voice-conversion-based-feature-disentanglement-using-variational-autoencoder-2107.06642</loc><lastmod>2021-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-based-feature-disentanglement-using-variational-autoencoder-2107.06642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/many-to-many-voice-conversion-based-feature-disentanglement-using-variational-autoencoder-2107.06642"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-online-convolutional-beamforming-2107.06775</loc><lastmod>2021-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-online-convolutional-beamforming-2107.06775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-online-convolutional-beamforming-2107.06775"/></url>
<url><loc>https://scifaro.com/en/abs/filtered-noise-shaping-for-time-domain-room-impulse-response-estimation-from-reverberant-speech-2107.07503</loc><lastmod>2021-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filtered-noise-shaping-for-time-domain-room-impulse-response-estimation-from-reverberant-speech-2107.07503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filtered-noise-shaping-for-time-domain-room-impulse-response-estimation-from-reverberant-speech-2107.07503"/></url>
<url><loc>https://scifaro.com/en/abs/vad-free-streaming-hybrid-ctc-attention-asr-for-unsegmented-recording-2107.07509</loc><lastmod>2021-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vad-free-streaming-hybrid-ctc-attention-asr-for-unsegmented-recording-2107.07509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vad-free-streaming-hybrid-ctc-attention-asr-for-unsegmented-recording-2107.07509"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-learning-with-cross-attention-for-keyword-spotting-2107.07634</loc><lastmod>2021-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-learning-with-cross-attention-for-keyword-spotting-2107.07634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-learning-with-cross-attention-for-keyword-spotting-2107.07634"/></url>
<url><loc>https://scifaro.com/en/abs/residual-attention-based-network-for-automatic-classification-of-phonation-modes-2107.08425</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-attention-based-network-for-automatic-classification-of-phonation-modes-2107.08425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-attention-based-network-for-automatic-classification-of-phonation-modes-2107.08425"/></url>
<url><loc>https://scifaro.com/en/abs/channel-wise-gated-res2net-towards-robust-detection-of-synthetic-speech-attacks-2107.08803</loc><lastmod>2021-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-wise-gated-res2net-towards-robust-detection-of-synthetic-speech-attacks-2107.08803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-wise-gated-res2net-towards-robust-detection-of-synthetic-speech-attacks-2107.08803"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-veracity-of-local-model-agnostic-explanations-in-audio-classification-targeted-investigations-with-adversarial-examples-2107.09045</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-veracity-of-local-model-agnostic-explanations-in-audio-classification-targeted-investigations-with-adversarial-examples-2107.09045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-veracity-of-local-model-agnostic-explanations-in-audio-classification-targeted-investigations-with-adversarial-examples-2107.09045"/></url>
<url><loc>https://scifaro.com/en/abs/improving-reverberant-speech-separation-with-multi-stage-training-and-curriculum-learning-2107.09177</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-reverberant-speech-separation-with-multi-stage-training-and-curriculum-learning-2107.09177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-reverberant-speech-separation-with-multi-stage-training-and-curriculum-learning-2107.09177"/></url>
<url><loc>https://scifaro.com/en/abs/svsnet-an-end-to-end-speaker-voice-similarity-assessment-model-2107.09392</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svsnet-an-end-to-end-speaker-voice-similarity-assessment-model-2107.09392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svsnet-an-end-to-end-speaker-voice-similarity-assessment-model-2107.09392"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-end-to-end-asr-based-on-blockwise-non-autoregressive-models-2107.09428</loc><lastmod>2021-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-end-to-end-asr-based-on-blockwise-non-autoregressive-models-2107.09428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-end-to-end-asr-based-on-blockwise-non-autoregressive-models-2107.09428"/></url>
<url><loc>https://scifaro.com/en/abs/audio-captioning-transformer-2107.09817</loc><lastmod>2021-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-captioning-transformer-2107.09817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-captioning-transformer-2107.09817"/></url>
<url><loc>https://scifaro.com/en/abs/cl4ac-a-contrastive-loss-for-audio-captioning-2107.09990</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cl4ac-a-contrastive-loss-for-audio-captioning-2107.09990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cl4ac-a-contrastive-loss-for-audio-captioning-2107.09990"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-sound-generation-using-neural-discrete-time-frequency-representation-learning-2107.09998</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-sound-generation-using-neural-discrete-time-frequency-representation-learning-2107.09998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-sound-generation-using-neural-discrete-time-frequency-representation-learning-2107.09998"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-the-remixing-of-separated-dialogue-with-a-non-intrusive-quality-estimate-2107.10151</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-the-remixing-of-separated-dialogue-with-a-non-intrusive-quality-estimate-2107.10151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-the-remixing-of-separated-dialogue-with-a-non-intrusive-quality-estimate-2107.10151"/></url>
<url><loc>https://scifaro.com/en/abs/what-makes-sound-event-localization-and-detection-difficult-insights-from-error-analysis-2107.10469</loc><lastmod>2021-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-makes-sound-event-localization-and-detection-difficult-insights-from-error-analysis-2107.10469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-makes-sound-event-localization-and-detection-difficult-insights-from-error-analysis-2107.10469"/></url>
<url><loc>https://scifaro.com/en/abs/improving-polyphonic-sound-event-detection-on-multichannel-recordings-with-the-s-o-rensen-dice-coefficient-loss-and-transfer-learning-2107.10471</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-polyphonic-sound-event-detection-on-multichannel-recordings-with-the-s-o-rensen-dice-coefficient-loss-and-transfer-learning-2107.10471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-polyphonic-sound-event-detection-on-multichannel-recordings-with-the-s-o-rensen-dice-coefficient-loss-and-transfer-learning-2107.10471"/></url>
<url><loc>https://scifaro.com/en/abs/controlling-the-perceived-sound-quality-for-dialogue-enhancement-with-deep-learning-2107.10562</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlling-the-perceived-sound-quality-for-dialogue-enhancement-with-deep-learning-2107.10562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlling-the-perceived-sound-quality-for-dialogue-enhancement-with-deep-learning-2107.10562"/></url>
<url><loc>https://scifaro.com/en/abs/a-baseline-model-for-computationally-inexpensive-speech-recognition-for-kazakh-using-the-coqui-stt-framework-2107.10637</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-baseline-model-for-computationally-inexpensive-speech-recognition-for-kazakh-using-the-coqui-stt-framework-2107.10637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-baseline-model-for-computationally-inexpensive-speech-recognition-for-kazakh-using-the-coqui-stt-framework-2107.10637"/></url>
<url><loc>https://scifaro.com/en/abs/digital-einstein-experience-fast-text-to-speech-for-conversational-ai-2107.10658</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digital-einstein-experience-fast-text-to-speech-for-conversational-ai-2107.10658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digital-einstein-experience-fast-text-to-speech-for-conversational-ai-2107.10658"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-classifier-for-just-in-time-woodpeckers-detection-and-deterrent-2107.10676</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-classifier-for-just-in-time-woodpeckers-detection-and-deterrent-2107.10676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-classifier-for-just-in-time-woodpeckers-detection-and-deterrent-2107.10676"/></url>
<url><loc>https://scifaro.com/en/abs/multitask-based-joint-learning-approach-to-robust-asr-for-radio-communication-speech-2107.10701</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitask-based-joint-learning-approach-to-robust-asr-for-radio-communication-speech-2107.10701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitask-based-joint-learning-approach-to-robust-asr-for-radio-communication-speech-2107.10701"/></url>
<url><loc>https://scifaro.com/en/abs/carnelinet-neural-mixture-model-for-automatic-speech-recognition-2107.10708</loc><lastmod>2021-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/carnelinet-neural-mixture-model-for-automatic-speech-recognition-2107.10708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/carnelinet-neural-mixture-model-for-automatic-speech-recognition-2107.10708"/></url>
<url><loc>https://scifaro.com/en/abs/harp-net-hyper-autoencoded-reconstruction-propagation-for-scalable-neural-audio-coding-2107.10843</loc><lastmod>2021-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harp-net-hyper-autoencoded-reconstruction-propagation-for-scalable-neural-audio-coding-2107.10843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harp-net-hyper-autoencoded-reconstruction-propagation-for-scalable-neural-audio-coding-2107.10843"/></url>
<url><loc>https://scifaro.com/en/abs/using-nlp-to-analyze-whether-customer-statements-comply-with-their-inner-belief-2107.11175</loc><lastmod>2021-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-nlp-to-analyze-whether-customer-statements-comply-with-their-inner-belief-2107.11175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-nlp-to-analyze-whether-customer-statements-comply-with-their-inner-belief-2107.11175"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-communications-for-speech-recognition-2107.11190</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-communications-for-speech-recognition-2107.11190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-communications-for-speech-recognition-2107.11190"/></url>
<url><loc>https://scifaro.com/en/abs/use-of-speaker-recognition-approaches-for-learning-and-evaluating-embedding-representations-of-musical-instrument-sounds-2107.11506</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/use-of-speaker-recognition-approaches-for-learning-and-evaluating-embedding-representations-of-musical-instrument-sounds-2107.11506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/use-of-speaker-recognition-approaches-for-learning-and-evaluating-embedding-representations-of-musical-instrument-sounds-2107.11506"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-speech-enhancement-based-on-diffusion-probabilistic-model-2107.11876</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-speech-enhancement-based-on-diffusion-probabilistic-model-2107.11876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-speech-enhancement-based-on-diffusion-probabilistic-model-2107.11876"/></url>
<url><loc>https://scifaro.com/en/abs/inplace-gated-convolutional-recurrent-neural-network-for-dual-channel-speech-enhancement-2107.11968</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inplace-gated-convolutional-recurrent-neural-network-for-dual-channel-speech-enhancement-2107.11968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inplace-gated-convolutional-recurrent-neural-network-for-dual-channel-speech-enhancement-2107.11968"/></url>
<url><loc>https://scifaro.com/en/abs/ur-channel-robust-synthetic-speech-detection-system-for-asvspoof-2021-2107.12018</loc><lastmod>2026-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ur-channel-robust-synthetic-speech-detection-system-for-asvspoof-2021-2107.12018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ur-channel-robust-synthetic-speech-detection-system-for-asvspoof-2021-2107.12018"/></url>
<url><loc>https://scifaro.com/en/abs/adaptation-of-tacotron2-based-text-to-speech-for-articulatory-to-acoustic-mapping-using-ultrasound-tongue-imaging-2107.12051</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptation-of-tacotron2-based-text-to-speech-for-articulatory-to-acoustic-mapping-using-ultrasound-tongue-imaging-2107.12051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptation-of-tacotron2-based-text-to-speech-for-articulatory-to-acoustic-mapping-using-ultrasound-tongue-imaging-2107.12051"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourcing-strong-labels-for-sound-event-detection-2107.12089</loc><lastmod>2021-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourcing-strong-labels-for-sound-event-detection-2107.12089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourcing-strong-labels-for-sound-event-detection-2107.12089"/></url>
<url><loc>https://scifaro.com/en/abs/raw-differentiable-architecture-search-for-speech-deepfake-and-spoofing-detection-2107.12212</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/raw-differentiable-architecture-search-for-speech-deepfake-and-spoofing-detection-2107.12212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/raw-differentiable-architecture-search-for-speech-deepfake-and-spoofing-detection-2107.12212"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-array-generalization-for-multichannel-narrowband-deep-speech-enhancement-2107.12601</loc><lastmod>2021-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-array-generalization-for-multichannel-narrowband-deep-speech-enhancement-2107.12601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-array-generalization-for-multichannel-narrowband-deep-speech-enhancement-2107.12601"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-spectro-temporal-graph-attention-networks-for-speaker-verification-anti-spoofing-and-speech-deepfake-detection-2107.12710</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-spectro-temporal-graph-attention-networks-for-speaker-verification-anti-spoofing-and-speech-deepfake-detection-2107.12710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-spectro-temporal-graph-attention-networks-for-speaker-verification-anti-spoofing-and-speech-deepfake-detection-2107.12710"/></url>
<url><loc>https://scifaro.com/en/abs/a-visual-domain-transfer-learning-approach-for-heartbeat-sound-classification-2107.13237</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-visual-domain-transfer-learning-approach-for-heartbeat-sound-classification-2107.13237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-visual-domain-transfer-learning-approach-for-heartbeat-sound-classification-2107.13237"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-cough-detection-camera-using-enhanced-features-2107.13260</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-cough-detection-camera-using-enhanced-features-2107.13260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-cough-detection-camera-using-enhanced-features-2107.13260"/></url>
<url><loc>https://scifaro.com/en/abs/vowel-based-meeteilon-dialect-identification-using-a-random-forest-classifier-2107.13419</loc><lastmod>2021-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vowel-based-meeteilon-dialect-identification-using-a-random-forest-classifier-2107.13419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vowel-based-meeteilon-dialect-identification-using-a-random-forest-classifier-2107.13419"/></url>
<url><loc>https://scifaro.com/en/abs/an-adapter-based-pre-training-for-efficient-and-scalable-self-supervised-speech-representation-learning-2107.13530</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adapter-based-pre-training-for-efficient-and-scalable-self-supervised-speech-representation-learning-2107.13530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adapter-based-pre-training-for-efficient-and-scalable-self-supervised-speech-representation-learning-2107.13530"/></url>
<url><loc>https://scifaro.com/en/abs/proposal-based-few-shot-sound-event-detection-for-speech-and-environmental-sounds-with-perceivers-2107.13616</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proposal-based-few-shot-sound-event-detection-for-speech-and-environmental-sounds-with-perceivers-2107.13616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proposal-based-few-shot-sound-event-detection-for-speech-and-environmental-sounds-with-perceivers-2107.13616"/></url>
<url><loc>https://scifaro.com/en/abs/don-t-separate-learn-to-remix-end-to-end-neural-remixing-with-joint-optimization-2107.13634</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/don-t-separate-learn-to-remix-end-to-end-neural-remixing-with-joint-optimization-2107.13634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/don-t-separate-learn-to-remix-end-to-end-neural-remixing-with-joint-optimization-2107.13634"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-classroom-activity-detection-from-audio-with-neural-networks-2107.14369</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-classroom-activity-detection-from-audio-with-neural-networks-2107.14369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-classroom-activity-detection-from-audio-with-neural-networks-2107.14369"/></url>
<url><loc>https://scifaro.com/en/abs/usc-an-open-source-uzbek-speech-corpus-and-initial-speech-recognition-experiments-2107.14419</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usc-an-open-source-uzbek-speech-corpus-and-initial-speech-recognition-experiments-2107.14419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usc-an-open-source-uzbek-speech-corpus-and-initial-speech-recognition-experiments-2107.14419"/></url>
<url><loc>https://scifaro.com/en/abs/speeding-up-permutation-invariant-training-for-source-separation-2107.14445</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speeding-up-permutation-invariant-training-for-source-separation-2107.14445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speeding-up-permutation-invariant-training-for-source-separation-2107.14445"/></url>
<url><loc>https://scifaro.com/en/abs/graph-pit-generalized-permutation-invariant-training-for-continuous-separation-of-arbitrary-numbers-of-speakers-2107.14446</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-pit-generalized-permutation-invariant-training-for-continuous-separation-of-arbitrary-numbers-of-speakers-2107.14446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-pit-generalized-permutation-invariant-training-for-continuous-separation-of-arbitrary-numbers-of-speakers-2107.14446"/></url>
<url><loc>https://scifaro.com/en/abs/on-line-audio-to-lyrics-alignment-based-on-a-reference-performance-2107.14496</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-line-audio-to-lyrics-alignment-based-on-a-reference-performance-2107.14496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-line-audio-to-lyrics-alignment-based-on-a-reference-performance-2107.14496"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-head-relevance-weighting-framework-for-learning-raw-waveform-audio-representations-2107.14793</loc><lastmod>2021-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-head-relevance-weighting-framework-for-learning-raw-waveform-audio-representations-2107.14793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-head-relevance-weighting-framework-for-learning-raw-waveform-audio-representations-2107.14793"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-audio-synthesis-and-audio-visual-multimodal-processing-2108.00443</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-audio-synthesis-and-audio-visual-multimodal-processing-2108.00443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-audio-synthesis-and-audio-visual-multimodal-processing-2108.00443"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-feature-learning-of-1d-convolutional-neural-networks-with-contrastive-loss-for-eating-detection-using-an-in-ear-microphone-2108.00769</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-feature-learning-of-1d-convolutional-neural-networks-with-contrastive-loss-for-eating-detection-using-an-in-ear-microphone-2108.00769"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-feature-learning-of-1d-convolutional-neural-networks-with-contrastive-loss-for-eating-detection-using-an-in-ear-microphone-2108.00769"/></url>
<url><loc>https://scifaro.com/en/abs/bite-weight-estimation-using-commercial-ear-buds-2108.00771</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bite-weight-estimation-using-commercial-ear-buds-2108.00771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bite-weight-estimation-using-commercial-ear-buds-2108.00771"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-data-augmentation-for-disordered-speech-recognition-2108.00899</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-data-augmentation-for-disordered-speech-recognition-2108.00899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-data-augmentation-for-disordered-speech-recognition-2108.00899"/></url>
<url><loc>https://scifaro.com/en/abs/robust-acoustic-scene-classification-in-the-presence-of-active-foreground-speech-2108.00912</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-acoustic-scene-classification-in-the-presence-of-active-foreground-speech-2108.00912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-acoustic-scene-classification-in-the-presence-of-active-foreground-speech-2108.00912"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-speaker-information-in-self-supervised-models-to-improve-zero-resource-speech-processing-2108.00917</loc><lastmod>2021-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-speaker-information-in-self-supervised-models-to-improve-zero-resource-speech-processing-2108.00917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-speaker-information-in-self-supervised-models-to-improve-zero-resource-speech-processing-2108.00917"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-multilingual-end-to-end-speech-recognition-for-kazakh-russian-and-english-2108.01280</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-multilingual-end-to-end-speech-recognition-for-kazakh-russian-and-english-2108.01280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-multilingual-end-to-end-speech-recognition-for-kazakh-russian-and-english-2108.01280"/></url>
<url><loc>https://scifaro.com/en/abs/creation-and-detection-of-german-voice-deepfakes-2108.01469</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creation-and-detection-of-german-voice-deepfakes-2108.01469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creation-and-detection-of-german-voice-deepfakes-2108.01469"/></url>
<url><loc>https://scifaro.com/en/abs/amortized-neural-networks-for-low-latency-speech-recognition-2108.01553</loc><lastmod>2021-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/amortized-neural-networks-for-low-latency-speech-recognition-2108.01553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/amortized-neural-networks-for-low-latency-speech-recognition-2108.01553"/></url>
<url><loc>https://scifaro.com/en/abs/learning-a-neural-diff-for-speech-models-2108.01561</loc><lastmod>2021-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-a-neural-diff-for-speech-models-2108.01561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-a-neural-diff-for-speech-models-2108.01561"/></url>
<url><loc>https://scifaro.com/en/abs/bifocal-neural-asr-exploiting-keyword-spotting-for-inference-optimization-2108.01704</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bifocal-neural-asr-exploiting-keyword-spotting-for-inference-optimization-2108.01704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bifocal-neural-asr-exploiting-keyword-spotting-for-inference-optimization-2108.01704"/></url>
<url><loc>https://scifaro.com/en/abs/blind-and-neural-network-guided-convolutional-beamformer-for-joint-denoising-dereverberation-and-source-separation-2108.01836</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-and-neural-network-guided-convolutional-beamformer-for-joint-denoising-dereverberation-and-source-separation-2108.01836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-and-neural-network-guided-convolutional-beamformer-for-joint-denoising-dereverberation-and-source-separation-2108.01836"/></url>
<url><loc>https://scifaro.com/en/abs/lung-sound-classification-using-co-tuning-and-stochastic-normalization-2108.01991</loc><lastmod>2021-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lung-sound-classification-using-co-tuning-and-stochastic-normalization-2108.01991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lung-sound-classification-using-co-tuning-and-stochastic-normalization-2108.01991"/></url>
<url><loc>https://scifaro.com/en/abs/an-encoder-decoder-based-audio-captioning-system-with-transfer-and-reinforcement-learning-2108.02752</loc><lastmod>2021-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-encoder-decoder-based-audio-captioning-system-with-transfer-and-reinforcement-learning-2108.02752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-encoder-decoder-based-audio-captioning-system-with-transfer-and-reinforcement-learning-2108.02752"/></url>
<url><loc>https://scifaro.com/en/abs/sinsy-a-deep-neural-network-based-singing-voice-synthesis-system-2108.02776</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sinsy-a-deep-neural-network-based-singing-voice-synthesis-system-2108.02776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sinsy-a-deep-neural-network-based-singing-voice-synthesis-system-2108.02776"/></url>
<url><loc>https://scifaro.com/en/abs/applying-the-information-bottleneck-principle-to-prosodic-representation-learning-2108.02821</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-the-information-bottleneck-principle-to-prosodic-representation-learning-2108.02821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-the-information-bottleneck-principle-to-prosodic-representation-learning-2108.02821"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-domain-adaptation-in-speech-recognition-using-phonetic-features-2108.02850</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-in-speech-recognition-using-phonetic-features-2108.02850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-domain-adaptation-in-speech-recognition-using-phonetic-features-2108.02850"/></url>
<url><loc>https://scifaro.com/en/abs/deep-residual-echo-suppression-and-noise-reduction-a-multi-input-fcrn-approach-in-a-hybrid-speech-enhancement-system-2108.03051</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-residual-echo-suppression-and-noise-reduction-a-multi-input-fcrn-approach-in-a-hybrid-speech-enhancement-system-2108.03051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-residual-echo-suppression-and-noise-reduction-a-multi-input-fcrn-approach-in-a-hybrid-speech-enhancement-system-2108.03051"/></url>
<url><loc>https://scifaro.com/en/abs/complex-valued-spatial-autoencoders-for-multichannel-speech-enhancement-2108.03130</loc><lastmod>2021-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-valued-spatial-autoencoders-for-multichannel-speech-enhancement-2108.03130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-valued-spatial-autoencoders-for-multichannel-speech-enhancement-2108.03130"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-improved-i-vector-estimation-for-unknown-number-of-speaker-2108.03342</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-improved-i-vector-estimation-for-unknown-number-of-speaker-2108.03342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-improved-i-vector-estimation-for-unknown-number-of-speaker-2108.03342"/></url>
<url><loc>https://scifaro.com/en/abs/beatnet-crnn-and-particle-filtering-for-online-joint-beat-downbeat-and-meter-tracking-2108.03576</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beatnet-crnn-and-particle-filtering-for-online-joint-beat-downbeat-and-meter-tracking-2108.03576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beatnet-crnn-and-particle-filtering-for-online-joint-beat-downbeat-and-meter-tracking-2108.03576"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-with-joint-dereverberation-of-sub-band-autoregressive-envelopes-2108.03975</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-joint-dereverberation-of-sub-band-autoregressive-envelopes-2108.03975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-joint-dereverberation-of-sub-band-autoregressive-envelopes-2108.03975"/></url>
<url><loc>https://scifaro.com/en/abs/a-streamwise-gan-vocoder-for-wideband-speech-coding-at-very-low-bit-rate-2108.04051</loc><lastmod>2021-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-streamwise-gan-vocoder-for-wideband-speech-coding-at-very-low-bit-rate-2108.04051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-streamwise-gan-vocoder-for-wideband-speech-coding-at-very-low-bit-rate-2108.04051"/></url>
<url><loc>https://scifaro.com/en/abs/masked-acoustic-unit-for-mispronunciation-detection-and-correction-2108.05517</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-acoustic-unit-for-mispronunciation-detection-and-correction-2108.05517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-acoustic-unit-for-mispronunciation-detection-and-correction-2108.05517"/></url>
<url><loc>https://scifaro.com/en/abs/dereverberation-of-autoregressive-envelopes-for-far-field-speech-recognition-2108.05520</loc><lastmod>2021-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dereverberation-of-autoregressive-envelopes-for-far-field-speech-recognition-2108.05520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dereverberation-of-autoregressive-envelopes-for-far-field-speech-recognition-2108.05520"/></url>
<url><loc>https://scifaro.com/en/abs/xi-vector-embedding-for-speaker-recognition-2108.05679</loc><lastmod>2021-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xi-vector-embedding-for-speaker-recognition-2108.05679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xi-vector-embedding-for-speaker-recognition-2108.05679"/></url>
<url><loc>https://scifaro.com/en/abs/joint-spatio-temporal-discretisation-of-nonlinear-active-cochlear-models-2108.05993</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-spatio-temporal-discretisation-of-nonlinear-active-cochlear-models-2108.05993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-spatio-temporal-discretisation-of-nonlinear-active-cochlear-models-2108.05993"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-training-set-selection-for-asr-in-under-resourced-malian-languages-2108.06164</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-training-set-selection-for-asr-in-under-resourced-malian-languages-2108.06164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-training-set-selection-for-asr-in-under-resourced-malian-languages-2108.06164"/></url>
<url><loc>https://scifaro.com/en/abs/feature-learning-for-efficient-asr-free-keyword-spotting-in-low-resource-languages-2108.06174</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-learning-for-efficient-asr-free-keyword-spotting-in-low-resource-languages-2108.06174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-learning-for-efficient-asr-free-keyword-spotting-in-low-resource-languages-2108.06174"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audio-quality-for-expressive-neural-text-to-speech-2108.06270</loc><lastmod>2021-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audio-quality-for-expressive-neural-text-to-speech-2108.06270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audio-quality-for-expressive-neural-text-to-speech-2108.06270"/></url>
<url><loc>https://scifaro.com/en/abs/gc-tts-few-shot-speaker-adaptation-with-geometric-constraints-2108.06890</loc><lastmod>2021-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gc-tts-few-shot-speaker-adaptation-with-geometric-constraints-2108.06890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gc-tts-few-shot-speaker-adaptation-with-geometric-constraints-2108.06890"/></url>
<url><loc>https://scifaro.com/en/abs/language-independent-approach-for-automatic-computation-of-vowel-articulation-features-in-dysarthric-speech-assessment-2108.06943</loc><lastmod>2021-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-independent-approach-for-automatic-computation-of-vowel-articulation-features-in-dysarthric-speech-assessment-2108.06943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-independent-approach-for-automatic-computation-of-vowel-articulation-features-in-dysarthric-speech-assessment-2108.06943"/></url>
<url><loc>https://scifaro.com/en/abs/precision-and-accuracy-of-acoustic-gunshot-location-in-an-urban-environment-2108.07377</loc><lastmod>2021-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/precision-and-accuracy-of-acoustic-gunshot-location-in-an-urban-environment-2108.07377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/precision-and-accuracy-of-acoustic-gunshot-location-in-an-urban-environment-2108.07377"/></url>
<url><loc>https://scifaro.com/en/abs/fdn-finite-difference-network-with-hierarchical-convolutional-features-for-text-independent-speaker-verification-2108.07974</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fdn-finite-difference-network-with-hierarchical-convolutional-features-for-text-independent-speaker-verification-2108.07974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fdn-finite-difference-network-with-hierarchical-convolutional-features-for-text-independent-speaker-verification-2108.07974"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-level-acoustic-feature-extraction-framework-for-transformer-based-end-to-end-speech-recognition-2108.07980</loc><lastmod>2022-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-level-acoustic-feature-extraction-framework-for-transformer-based-end-to-end-speech-recognition-2108.07980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-level-acoustic-feature-extraction-framework-for-transformer-based-end-to-end-speech-recognition-2108.07980"/></url>
<url><loc>https://scifaro.com/en/abs/chmusic-a-traditional-chinese-music-dataset-for-evaluation-of-instrument-recognition-2108.08470</loc><lastmod>2021-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chmusic-a-traditional-chinese-music-dataset-for-evaluation-of-instrument-recognition-2108.08470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chmusic-a-traditional-chinese-music-dataset-for-evaluation-of-instrument-recognition-2108.08470"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-pseudo-multilabel-2108.08663</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-pseudo-multilabel-2108.08663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-cross-lingual-speech-emotion-recognition-using-pseudo-multilabel-2108.08663"/></url>
<url><loc>https://scifaro.com/en/abs/more-for-less-non-intrusive-speech-quality-assessment-with-limited-annotations-2108.08745</loc><lastmod>2021-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/more-for-less-non-intrusive-speech-quality-assessment-with-limited-annotations-2108.08745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/more-for-less-non-intrusive-speech-quality-assessment-with-limited-annotations-2108.08745"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-the-assessment-of-infant-vocalizations-by-laypersons-2108.09205</loc><lastmod>2021-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-the-assessment-of-infant-vocalizations-by-laypersons-2108.09205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-the-assessment-of-infant-vocalizations-by-laypersons-2108.09205"/></url>
<url><loc>https://scifaro.com/en/abs/using-large-pre-trained-models-with-cross-modal-attention-for-multi-modal-emotion-recognition-2108.09669</loc><lastmod>2021-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-large-pre-trained-models-with-cross-modal-attention-for-multi-modal-emotion-recognition-2108.09669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-large-pre-trained-models-with-cross-modal-attention-for-multi-modal-emotion-recognition-2108.09669"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-2108.10357</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-2108.10357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-2108.10357"/></url>
<url><loc>https://scifaro.com/en/abs/learning-sparse-analytic-filters-for-piano-transcription-2108.10382</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-sparse-analytic-filters-for-piano-transcription-2108.10382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-sparse-analytic-filters-for-piano-transcription-2108.10382"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-lightweight-acoustic-curtains-for-mid-to-high-frequency-noise-insulations-2108.10683</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-lightweight-acoustic-curtains-for-mid-to-high-frequency-noise-insulations-2108.10683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-lightweight-acoustic-curtains-for-mid-to-high-frequency-noise-insulations-2108.10683"/></url>
<url><loc>https://scifaro.com/en/abs/scorpiano-a-system-for-automatic-music-transcription-for-monophonic-piano-music-2108.10689</loc><lastmod>2021-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scorpiano-a-system-for-automatic-music-transcription-for-monophonic-piano-music-2108.10689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scorpiano-a-system-for-automatic-music-transcription-for-monophonic-piano-music-2108.10689"/></url>
<url><loc>https://scifaro.com/en/abs/curricular-sincnet-towards-robust-deep-speaker-recognition-by-emphasizing-hard-samples-in-latent-space-2108.10714</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/curricular-sincnet-towards-robust-deep-speaker-recognition-by-emphasizing-hard-samples-in-latent-space-2108.10714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/curricular-sincnet-towards-robust-deep-speaker-recognition-by-emphasizing-hard-samples-in-latent-space-2108.10714"/></url>
<url><loc>https://scifaro.com/en/abs/generalizing-rnn-transducer-to-out-domain-audio-via-sparse-self-attention-layers-2108.10752</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalizing-rnn-transducer-to-out-domain-audio-via-sparse-self-attention-layers-2108.10752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalizing-rnn-transducer-to-out-domain-audio-via-sparse-self-attention-layers-2108.10752"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-envelope-and-fine-structure-cues-for-dysarthric-speech-detection-using-cnns-2108.11153</loc><lastmod>2021-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-envelope-and-fine-structure-cues-for-dysarthric-speech-detection-using-cnns-2108.11153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-envelope-and-fine-structure-cues-for-dysarthric-speech-detection-using-cnns-2108.11153"/></url>
<url><loc>https://scifaro.com/en/abs/with-one-voice-composing-a-travel-voice-assistant-from-re-purposed-models-2108.11463</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/with-one-voice-composing-a-travel-voice-assistant-from-re-purposed-models-2108.11463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/with-one-voice-composing-a-travel-voice-assistant-from-re-purposed-models-2108.11463"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-single-channel-speech-enhancement-model-with-bi-projection-fusion-module-for-noise-robust-asr-2108.11598</loc><lastmod>2021-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-single-channel-speech-enhancement-model-with-bi-projection-fusion-module-for-noise-robust-asr-2108.11598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-single-channel-speech-enhancement-model-with-bi-projection-fusion-module-for-noise-robust-asr-2108.11598"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-learning-loss-function-based-on-auditory-power-compression-for-speech-enhancement-2108.11877</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-learning-loss-function-based-on-auditory-power-compression-for-speech-enhancement-2108.11877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-learning-loss-function-based-on-auditory-power-compression-for-speech-enhancement-2108.11877"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representations-and-phoneme-classification-for-preserving-the-endangered-language-of-ladin-2108.12531</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representations-and-phoneme-classification-for-preserving-the-endangered-language-of-ladin-2108.12531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representations-and-phoneme-classification-for-preserving-the-endangered-language-of-ladin-2108.12531"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-transformer-transducer-for-speech-recognition-2108.12953</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-transformer-transducer-for-speech-recognition-2108.12953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-transformer-transducer-for-speech-recognition-2108.12953"/></url>
<url><loc>https://scifaro.com/en/abs/inse-net-a-perceptually-coded-audio-quality-model-based-on-cnn-2108.13087</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inse-net-a-perceptually-coded-audio-quality-model-based-on-cnn-2108.13087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inse-net-a-perceptually-coded-audio-quality-model-based-on-cnn-2108.13087"/></url>
<url><loc>https://scifaro.com/en/abs/neural-hmms-are-all-you-need-for-high-quality-attention-free-tts-2108.13320</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-hmms-are-all-you-need-for-high-quality-attention-free-tts-2108.13320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-hmms-are-all-you-need-for-high-quality-attention-free-tts-2108.13320"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-bandwidth-extension-using-deep-neural-network-and-h-infty-sampled-data-control-theory-2108.13326</loc><lastmod>2021-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-bandwidth-extension-using-deep-neural-network-and-h-infty-sampled-data-control-theory-2108.13326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-bandwidth-extension-using-deep-neural-network-and-h-infty-sampled-data-control-theory-2108.13326"/></url>
<url><loc>https://scifaro.com/en/abs/music-demixing-challenge-2021-2108.13559</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-demixing-challenge-2021-2108.13559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-demixing-challenge-2021-2108.13559"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-f1-score-training-for-end-to-end-mispronunciation-detection-and-diagnosis-of-l2-english-speech-2108.13816</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-f1-score-training-for-end-to-end-mispronunciation-detection-and-diagnosis-of-l2-english-speech-2108.13816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-f1-score-training-for-end-to-end-mispronunciation-detection-and-diagnosis-of-l2-english-speech-2108.13816"/></url>
<url><loc>https://scifaro.com/en/abs/neural-sequence-to-sequence-speech-synthesis-using-a-hidden-semi-markov-model-based-structured-attention-mechanism-2108.13985</loc><lastmod>2021-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-sequence-to-sequence-speech-synthesis-using-a-hidden-semi-markov-model-based-structured-attention-mechanism-2108.13985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-sequence-to-sequence-speech-synthesis-using-a-hidden-semi-markov-model-based-structured-attention-mechanism-2108.13985"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2021-automatic-speaker-verification-spoofing-and-countermeasures-challenge-evaluation-plan-2109.00535</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2021-automatic-speaker-verification-spoofing-and-countermeasures-challenge-evaluation-plan-2109.00535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2021-automatic-speaker-verification-spoofing-and-countermeasures-challenge-evaluation-plan-2109.00535"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-2021-accelerating-progress-in-spoofed-and-deepfake-speech-detection-2109.00537</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-2021-accelerating-progress-in-spoofed-and-deepfake-speech-detection-2109.00537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-2021-accelerating-progress-in-spoofed-and-deepfake-speech-detection-2109.00537"/></url>
<url><loc>https://scifaro.com/en/abs/physiological-physical-feature-fusion-for-automatic-voice-spoofing-detection-2109.00913</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physiological-physical-feature-fusion-for-automatic-voice-spoofing-detection-2109.00913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physiological-physical-feature-fusion-for-automatic-voice-spoofing-detection-2109.00913"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-retraining-free-speech-recognition-for-intra-sentential-code-switching-2109.00921</loc><lastmod>2021-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-retraining-free-speech-recognition-for-intra-sentential-code-switching-2109.00921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-retraining-free-speech-recognition-for-intra-sentential-code-switching-2109.00921"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioned-hierarchical-modeling-for-automated-speech-scoring-2109.00928</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioned-hierarchical-modeling-for-automated-speech-scoring-2109.00928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioned-hierarchical-modeling-for-automated-speech-scoring-2109.00928"/></url>
<url><loc>https://scifaro.com/en/abs/you-only-hear-once-a-yolo-like-algorithm-for-audio-segmentation-and-sound-event-detection-2109.00962</loc><lastmod>2022-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/you-only-hear-once-a-yolo-like-algorithm-for-audio-segmentation-and-sound-event-detection-2109.00962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/you-only-hear-once-a-yolo-like-algorithm-for-audio-segmentation-and-sound-event-detection-2109.00962"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-conformer-progressive-downsampling-and-grouped-attention-for-automatic-speech-recognition-2109.01163</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-conformer-progressive-downsampling-and-grouped-attention-for-automatic-speech-recognition-2109.01163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-conformer-progressive-downsampling-and-grouped-attention-for-automatic-speech-recognition-2109.01163"/></url>
<url><loc>https://scifaro.com/en/abs/scalable-data-annotation-pipeline-for-high-quality-large-speech-datasets-development-2109.01164</loc><lastmod>2021-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scalable-data-annotation-pipeline-for-high-quality-large-speech-datasets-development-2109.01164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scalable-data-annotation-pipeline-for-high-quality-large-speech-datasets-development-2109.01164"/></url>
<url><loc>https://scifaro.com/en/abs/phone-duration-modeling-for-speaker-age-estimation-in-children-2109.01568</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phone-duration-modeling-for-speaker-age-estimation-in-children-2109.01568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phone-duration-modeling-for-speaker-age-estimation-in-children-2109.01568"/></url>
<url><loc>https://scifaro.com/en/abs/musical-tempo-estimation-using-a-multi-scale-network-2109.01607</loc><lastmod>2021-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-tempo-estimation-using-a-multi-scale-network-2109.01607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-tempo-estimation-using-a-multi-scale-network-2109.01607"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-dukeece-lenovo-system-for-the-diarization-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02002</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-dukeece-lenovo-system-for-the-diarization-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-dukeece-lenovo-system-for-the-diarization-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02002"/></url>
<url><loc>https://scifaro.com/en/abs/xmuspeech-system-for-voxceleb-speaker-recognition-challenge-2021-2109.02549</loc><lastmod>2021-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xmuspeech-system-for-voxceleb-speaker-recognition-challenge-2021-2109.02549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xmuspeech-system-for-voxceleb-speaker-recognition-challenge-2021-2109.02549"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-identification-for-shared-devices-by-adapting-embeddings-to-speaker-subsets-2109.02576</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-identification-for-shared-devices-by-adapting-embeddings-to-speaker-subsets-2109.02576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-identification-for-shared-devices-by-adapting-embeddings-to-speaker-subsets-2109.02576"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-self-supervision-speaker-verification-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02853</loc><lastmod>2021-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-self-supervision-speaker-verification-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02853"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-self-supervision-speaker-verification-task-of-the-2021-voxceleb-speaker-recognition-challenge-2109.02853"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-non-negative-matrix-co-factorisation-approach-for-noisy-neonatal-chest-sound-separation-2109.03275</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-non-negative-matrix-co-factorisation-approach-for-noisy-neonatal-chest-sound-separation-2109.03275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-non-negative-matrix-co-factorisation-approach-for-noisy-neonatal-chest-sound-separation-2109.03275"/></url>
<url><loc>https://scifaro.com/en/abs/referee-towards-reference-free-cross-speaker-style-transfer-with-low-quality-data-for-expressive-speech-synthesis-2109.03439</loc><lastmod>2021-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/referee-towards-reference-free-cross-speaker-style-transfer-with-low-quality-data-for-expressive-speech-synthesis-2109.03439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/referee-towards-reference-free-cross-speaker-style-transfer-with-low-quality-data-for-expressive-speech-synthesis-2109.03439"/></url>
<url><loc>https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2021-system-description-2109.04070</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2021-system-description-2109.04070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-idlab-voxceleb-speaker-recognition-challenge-2021-system-description-2109.04070"/></url>
<url><loc>https://scifaro.com/en/abs/robust-single-and-multi-loudspeaker-least-squares-based-equalization-for-hearing-devices-2109.04241</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-single-and-multi-loudspeaker-least-squares-based-equalization-for-hearing-devices-2109.04241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-single-and-multi-loudspeaker-least-squares-based-equalization-for-hearing-devices-2109.04241"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-end-to-end-speech-translation-with-parallel-autoregressive-rescoring-2109.04411</loc><lastmod>2021-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-end-to-end-speech-translation-with-parallel-autoregressive-rescoring-2109.04411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-end-to-end-speech-translation-with-parallel-autoregressive-rescoring-2109.04411"/></url>
<url><loc>https://scifaro.com/en/abs/directional-mclp-analysis-and-reconstruction-for-spatial-speech-communication-2109.04544</loc><lastmod>2021-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directional-mclp-analysis-and-reconstruction-for-spatial-speech-communication-2109.04544"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directional-mclp-analysis-and-reconstruction-for-spatial-speech-communication-2109.04544"/></url>
<url><loc>https://scifaro.com/en/abs/large-vocabulary-audio-visual-speech-recognition-in-noisy-environments-2109.04894</loc><lastmod>2021-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-vocabulary-audio-visual-speech-recognition-in-noisy-environments-2109.04894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-vocabulary-audio-visual-speech-recognition-in-noisy-environments-2109.04894"/></url>
<url><loc>https://scifaro.com/en/abs/remember-the-context-asr-slot-error-correction-through-memorization-2109.05092</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remember-the-context-asr-slot-error-correction-through-memorization-2109.05092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remember-the-context-asr-slot-error-correction-through-memorization-2109.05092"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-real-world-noisy-speech-in-neural-network-based-speech-enhancement-systems-2109.05172</loc><lastmod>2021-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-real-world-noisy-speech-in-neural-network-based-speech-enhancement-systems-2109.05172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-real-world-noisy-speech-in-neural-network-based-speech-enhancement-systems-2109.05172"/></url>
<url><loc>https://scifaro.com/en/abs/studying-squeeze-and-excitation-used-in-cnn-for-speaker-verification-2109.05977</loc><lastmod>2021-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/studying-squeeze-and-excitation-used-in-cnn-for-speaker-verification-2109.05977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/studying-squeeze-and-excitation-used-in-cnn-for-speaker-verification-2109.05977"/></url>
<url><loc>https://scifaro.com/en/abs/in-filter-computing-for-designing-ultra-light-acoustic-pattern-recognizers-2109.06171</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-filter-computing-for-designing-ultra-light-acoustic-pattern-recognizers-2109.06171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-filter-computing-for-designing-ultra-light-acoustic-pattern-recognizers-2109.06171"/></url>
<url><loc>https://scifaro.com/en/abs/overlap-aware-low-latency-online-speaker-diarization-based-on-end-to-end-local-segmentation-2109.06483</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overlap-aware-low-latency-online-speaker-diarization-based-on-end-to-end-local-segmentation-2109.06483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overlap-aware-low-latency-online-speaker-diarization-based-on-end-to-end-local-segmentation-2109.06483"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-metric-learning-with-graph-clustering-for-speaker-diarization-2109.06824</loc><lastmod>2021-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-metric-learning-with-graph-clustering-for-speaker-diarization-2109.06824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-metric-learning-with-graph-clustering-for-speaker-diarization-2109.06824"/></url>
<url><loc>https://scifaro.com/en/abs/fairseq-s-2-a-scalable-and-integrable-speech-synthesis-toolkit-2109.06912</loc><lastmod>2021-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fairseq-s-2-a-scalable-and-integrable-speech-synthesis-toolkit-2109.06912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fairseq-s-2-a-scalable-and-integrable-speech-synthesis-toolkit-2109.06912"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-rendering-from-microphone-array-signals-of-arbitrary-geometry-2109.07274</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-rendering-from-microphone-array-signals-of-arbitrary-geometry-2109.07274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-rendering-from-microphone-array-signals-of-arbitrary-geometry-2109.07274"/></url>
<url><loc>https://scifaro.com/en/abs/improving-streaming-transformer-based-asr-under-a-framework-of-self-supervised-learning-2109.07327</loc><lastmod>2021-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-streaming-transformer-based-asr-under-a-framework-of-self-supervised-learning-2109.07327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-streaming-transformer-based-asr-under-a-framework-of-self-supervised-learning-2109.07327"/></url>
<url><loc>https://scifaro.com/en/abs/improving-accent-identification-and-accented-speech-recognition-under-a-framework-of-self-supervised-learning-2109.07349</loc><lastmod>2021-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-accent-identification-and-accented-speech-recognition-under-a-framework-of-self-supervised-learning-2109.07349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-accent-identification-and-accented-speech-recognition-under-a-framework-of-self-supervised-learning-2109.07349"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-level-neural-confidence-measure-for-end-to-end-children-speech-recognition-2109.07750</loc><lastmod>2021-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-level-neural-confidence-measure-for-end-to-end-children-speech-recognition-2109.07750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-level-neural-confidence-measure-for-end-to-end-children-speech-recognition-2109.07750"/></url>
<url><loc>https://scifaro.com/en/abs/fser-deep-convolutional-neural-networks-for-speech-emotion-recognition-2109.07916</loc><lastmod>2021-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fser-deep-convolutional-neural-networks-for-speech-emotion-recognition-2109.07916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fser-deep-convolutional-neural-networks-for-speech-emotion-recognition-2109.07916"/></url>
<url><loc>https://scifaro.com/en/abs/behavior-of-keyword-spotting-networks-under-noisy-conditions-2109.07930</loc><lastmod>2021-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/behavior-of-keyword-spotting-networks-under-noisy-conditions-2109.07930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/behavior-of-keyword-spotting-networks-under-noisy-conditions-2109.07930"/></url>
<url><loc>https://scifaro.com/en/abs/dds-a-new-device-degraded-speech-dataset-for-speech-enhancement-2109.07931</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dds-a-new-device-degraded-speech-dataset-for-speech-enhancement-2109.07931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dds-a-new-device-degraded-speech-dataset-for-speech-enhancement-2109.07931"/></url>
<url><loc>https://scifaro.com/en/abs/pdaugment-data-augmentation-by-pitch-and-duration-adjustments-for-automatic-lyrics-transcription-2109.07940</loc><lastmod>2021-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pdaugment-data-augmentation-by-pitch-and-duration-adjustments-for-automatic-lyrics-transcription-2109.07940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pdaugment-data-augmentation-by-pitch-and-duration-adjustments-for-automatic-lyrics-transcription-2109.07940"/></url>
<url><loc>https://scifaro.com/en/abs/noresqa-a-framework-for-speech-quality-assessment-using-non-matching-references-2109.08125</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noresqa-a-framework-for-speech-quality-assessment-using-non-matching-references-2109.08125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noresqa-a-framework-for-speech-quality-assessment-using-non-matching-references-2109.08125"/></url>
<url><loc>https://scifaro.com/en/abs/fast-slow-transformer-for-visually-grounding-speech-2109.08186</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-slow-transformer-for-visually-grounding-speech-2109.08186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-slow-transformer-for-visually-grounding-speech-2109.08186"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-streaming-multi-talker-asr-with-dual-path-transducers-2109.08555</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-streaming-multi-talker-asr-with-dual-path-transducers-2109.08555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-streaming-multi-talker-asr-with-dual-path-transducers-2109.08555"/></url>
<url><loc>https://scifaro.com/en/abs/on-device-neural-speech-synthesis-2109.08710</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-device-neural-speech-synthesis-2109.08710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-device-neural-speech-synthesis-2109.08710"/></url>
<url><loc>https://scifaro.com/en/abs/dual-encoder-architecture-with-encoder-selection-for-joint-close-talk-and-far-talk-speech-recognition-2109.08744</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-encoder-architecture-with-encoder-selection-for-joint-close-talk-and-far-talk-speech-recognition-2109.08744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-encoder-architecture-with-encoder-selection-for-joint-close-talk-and-far-talk-speech-recognition-2109.08744"/></url>
<url><loc>https://scifaro.com/en/abs/fast-query-by-example-speech-search-using-separable-model-2109.08870</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-query-by-example-speech-search-using-separable-model-2109.08870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-query-by-example-speech-search-using-separable-model-2109.08870"/></url>
<url><loc>https://scifaro.com/en/abs/improving-text-independent-speaker-verification-with-auxiliary-speakers-using-graph-2109.09674</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-text-independent-speaker-verification-with-auxiliary-speakers-using-graph-2109.09674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-text-independent-speaker-verification-with-auxiliary-speakers-using-graph-2109.09674"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-echo-cancellation-using-residual-u-nets-2109.09686</loc><lastmod>2021-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-using-residual-u-nets-2109.09686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-echo-cancellation-using-residual-u-nets-2109.09686"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-clinical-utility-of-machine-learning-and-artificial-intelligence-approaches-to-analyze-speech-recordings-in-multiple-sclerosis-a-pilot-study-2109.09844</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-clinical-utility-of-machine-learning-and-artificial-intelligence-approaches-to-analyze-speech-recordings-in-multiple-sclerosis-a-pilot-study-2109.09844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-clinical-utility-of-machine-learning-and-artificial-intelligence-approaches-to-analyze-speech-recordings-in-multiple-sclerosis-a-pilot-study-2109.09844"/></url>
<url><loc>https://scifaro.com/en/abs/masks-fusion-with-multi-target-learning-for-speech-enhancement-2109.11164</loc><lastmod>2021-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masks-fusion-with-multi-target-learning-for-speech-enhancement-2109.11164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masks-fusion-with-multi-target-learning-for-speech-enhancement-2109.11164"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-dynamic-filter-for-keyword-spotting-2109.11165</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-dynamic-filter-for-keyword-spotting-2109.11165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-dynamic-filter-for-keyword-spotting-2109.11165"/></url>
<url><loc>https://scifaro.com/en/abs/channelaugment-improving-generalization-of-multi-channel-asr-by-training-with-input-channel-randomization-2109.11225</loc><lastmod>2021-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channelaugment-improving-generalization-of-multi-channel-asr-by-training-with-input-channel-randomization-2109.11225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channelaugment-improving-generalization-of-multi-channel-asr-by-training-with-input-channel-randomization-2109.11225"/></url>
<url><loc>https://scifaro.com/en/abs/turn-to-diarize-online-speaker-diarization-constrained-by-transformer-transducer-speaker-turn-detection-2109.11641</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/turn-to-diarize-online-speaker-diarization-constrained-by-transformer-transducer-speaker-turn-detection-2109.11641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/turn-to-diarize-online-speaker-diarization-constrained-by-transformer-transducer-speaker-turn-detection-2109.11641"/></url>
<url><loc>https://scifaro.com/en/abs/fast-md-fast-multi-decoder-end-to-end-speech-translation-with-non-autoregressive-hidden-intermediates-2109.12804</loc><lastmod>2021-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-md-fast-multi-decoder-end-to-end-speech-translation-with-non-autoregressive-hidden-intermediates-2109.12804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-md-fast-multi-decoder-end-to-end-speech-translation-with-non-autoregressive-hidden-intermediates-2109.12804"/></url>
<url><loc>https://scifaro.com/en/abs/bigssl-exploring-the-frontier-of-large-scale-semi-supervised-learning-for-automatic-speech-recognition-2109.13226</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigssl-exploring-the-frontier-of-large-scale-semi-supervised-learning-for-automatic-speech-recognition-2109.13226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigssl-exploring-the-frontier-of-large-scale-semi-supervised-learning-for-automatic-speech-recognition-2109.13226"/></url>
<url><loc>https://scifaro.com/en/abs/the-jhu-submission-to-voxsrc-21-track-3-2109.13425</loc><lastmod>2021-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-jhu-submission-to-voxsrc-21-track-3-2109.13425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-jhu-submission-to-voxsrc-21-track-3-2109.13425"/></url>
<url><loc>https://scifaro.com/en/abs/north-america-bixby-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.13518</loc><lastmod>2021-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/north-america-bixby-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.13518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/north-america-bixby-speaker-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2021-2109.13518"/></url>
<url><loc>https://scifaro.com/en/abs/msr-nv-neural-vocoder-using-multiple-sampling-rates-2109.13714</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msr-nv-neural-vocoder-using-multiple-sampling-rates-2109.13714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msr-nv-neural-vocoder-using-multiple-sampling-rates-2109.13714"/></url>
<url><loc>https://scifaro.com/en/abs/articulatory-coordination-for-speech-motor-tracking-in-huntington-disease-2109.13815</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/articulatory-coordination-for-speech-motor-tracking-in-huntington-disease-2109.13815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/articulatory-coordination-for-speech-motor-tracking-in-huntington-disease-2109.13815"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-non-target-events-in-synthetic-soundscapes-for-sound-event-detection-2109.14061</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-non-target-events-in-synthetic-soundscapes-for-sound-event-detection-2109.14061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-non-target-events-in-synthetic-soundscapes-for-sound-event-detection-2109.14061"/></url>
<url><loc>https://scifaro.com/en/abs/can-phones-syllables-and-words-emerge-as-side-products-of-cross-situational-audiovisual-learning-a-computational-investigation-2109.14200</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-phones-syllables-and-words-emerge-as-side-products-of-cross-situational-audiovisual-learning-a-computational-investigation-2109.14200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-phones-syllables-and-words-emerge-as-side-products-of-cross-situational-audiovisual-learning-a-computational-investigation-2109.14200"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-self-supervised-speech-pre-training-methods-on-flemish-dutch-2109.14357</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-self-supervised-speech-pre-training-methods-on-flemish-dutch-2109.14357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-self-supervised-speech-pre-training-methods-on-flemish-dutch-2109.14357"/></url>
<url><loc>https://scifaro.com/en/abs/objective-oriented-method-for-uniformation-of-various-directivity-representations-2109.14370</loc><lastmod>2021-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-oriented-method-for-uniformation-of-various-directivity-representations-2109.14370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-oriented-method-for-uniformation-of-various-directivity-representations-2109.14370"/></url>
<url><loc>https://scifaro.com/en/abs/a-universal-deep-room-acoustics-estimator-2109.14436</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-universal-deep-room-acoustics-estimator-2109.14436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-universal-deep-room-acoustics-estimator-2109.14436"/></url>
<url><loc>https://scifaro.com/en/abs/usev-universal-speaker-extraction-with-visual-cue-2109.14831</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usev-universal-speaker-extraction-with-visual-cue-2109.14831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usev-universal-speaker-extraction-with-visual-cue-2109.14831"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-pre-upsampling-generative-modelling-and-generative-adversarial-networks-in-audio-super-resolution-2109.14994</loc><lastmod>2021-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-pre-upsampling-generative-modelling-and-generative-adversarial-networks-in-audio-super-resolution-2109.14994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-pre-upsampling-generative-modelling-and-generative-adversarial-networks-in-audio-super-resolution-2109.14994"/></url>
<url><loc>https://scifaro.com/en/abs/federated-learning-in-asr-not-as-easy-as-you-think-2109.15108</loc><lastmod>2021-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-learning-in-asr-not-as-easy-as-you-think-2109.15108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-learning-in-asr-not-as-easy-as-you-think-2109.15108"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-multi-level-neonatal-heart-and-lung-sound-quality-assessment-for-telehealth-applications-2109.15127</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-multi-level-neonatal-heart-and-lung-sound-quality-assessment-for-telehealth-applications-2109.15127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-multi-level-neonatal-heart-and-lung-sound-quality-assessment-for-telehealth-applications-2109.15127"/></url>
<url><loc>https://scifaro.com/en/abs/portaspeech-portable-and-high-quality-generative-text-to-speech-2109.15166</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/portaspeech-portable-and-high-quality-generative-text-to-speech-2109.15166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/portaspeech-portable-and-high-quality-generative-text-to-speech-2109.15166"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-asr-domain-adaptation-using-self-and-semi-supervised-learning-2110.00165</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-asr-domain-adaptation-using-self-and-semi-supervised-learning-2110.00165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-asr-domain-adaptation-using-self-and-semi-supervised-learning-2110.00165"/></url>
<url><loc>https://scifaro.com/en/abs/salsa-spatial-cue-augmented-log-spectrogram-features-for-polyphonic-sound-event-localization-and-detection-2110.00275</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salsa-spatial-cue-augmented-log-spectrogram-features-for-polyphonic-sound-event-localization-and-detection-2110.00275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salsa-spatial-cue-augmented-log-spectrogram-features-for-polyphonic-sound-event-localization-and-detection-2110.00275"/></url>
<url><loc>https://scifaro.com/en/abs/speech-technology-for-everyone-automatic-speech-recognition-for-non-native-english-with-transfer-learning-2110.00678</loc><lastmod>2021-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-technology-for-everyone-automatic-speech-recognition-for-non-native-english-with-transfer-learning-2110.00678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-technology-for-everyone-automatic-speech-recognition-for-non-native-english-with-transfer-learning-2110.00678"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-complex-valued-multidilated-convolutional-neural-network-for-joint-acoustic-echo-cancellation-and-noise-suppression-2110.00745</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-complex-valued-multidilated-convolutional-neural-network-for-joint-acoustic-echo-cancellation-and-noise-suppression-2110.00745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-complex-valued-multidilated-convolutional-neural-network-for-joint-acoustic-echo-cancellation-and-noise-suppression-2110.00745"/></url>
<url><loc>https://scifaro.com/en/abs/significance-of-data-augmentation-for-improving-cleft-lip-and-palate-speech-recognition-2110.00797</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/significance-of-data-augmentation-for-improving-cleft-lip-and-palate-speech-recognition-2110.00797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/significance-of-data-augmentation-for-improving-cleft-lip-and-palate-speech-recognition-2110.00797"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-voice-activated-framework-using-self-supervised-learning-2110.01077</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-voice-activated-framework-using-self-supervised-learning-2110.01077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-voice-activated-framework-using-self-supervised-learning-2110.01077"/></url>
<url><loc>https://scifaro.com/en/abs/decoupling-speaker-independent-emotions-for-voice-conversion-via-source-filter-networks-2110.01164</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupling-speaker-independent-emotions-for-voice-conversion-via-source-filter-networks-2110.01164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupling-speaker-independent-emotions-for-voice-conversion-via-source-filter-networks-2110.01164"/></url>
<url><loc>https://scifaro.com/en/abs/the-second-dicova-challenge-dataset-and-performance-analysis-for-covid-19-diagnosis-using-acoustics-2110.01177</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-second-dicova-challenge-dataset-and-performance-analysis-for-covid-19-diagnosis-using-acoustics-2110.01177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-second-dicova-challenge-dataset-and-performance-analysis-for-covid-19-diagnosis-using-acoustics-2110.01177"/></url>
<url><loc>https://scifaro.com/en/abs/aasist-audio-anti-spoofing-using-integrated-spectro-temporal-graph-attention-networks-2110.01200</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aasist-audio-anti-spoofing-using-integrated-spectro-temporal-graph-attention-networks-2110.01200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aasist-audio-anti-spoofing-using-integrated-spectro-temporal-graph-attention-networks-2110.01200"/></url>
<url><loc>https://scifaro.com/en/abs/individualized-sound-pressure-equalization-in-hearing-devices-exploiting-an-electro-acoustic-model-2110.01422</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/individualized-sound-pressure-equalization-in-hearing-devices-exploiting-an-electro-acoustic-model-2110.01422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/individualized-sound-pressure-equalization-in-hearing-devices-exploiting-an-electro-acoustic-model-2110.01422"/></url>
<url><loc>https://scifaro.com/en/abs/wavebeat-end-to-end-beat-and-downbeat-tracking-in-the-time-domain-2110.01436</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavebeat-end-to-end-beat-and-downbeat-tracking-in-the-time-domain-2110.01436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavebeat-end-to-end-beat-and-downbeat-tracking-in-the-time-domain-2110.01436"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-pre-trained-asr-models-for-alzheimer-s-disease-recognition-through-spontaneous-speech-2110.01493</loc><lastmod>2021-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-pre-trained-asr-models-for-alzheimer-s-disease-recognition-through-spontaneous-speech-2110.01493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-pre-trained-asr-models-for-alzheimer-s-disease-recognition-through-spontaneous-speech-2110.01493"/></url>
<url><loc>https://scifaro.com/en/abs/dnsmos-p-835-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2110.01763</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnsmos-p-835-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2110.01763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnsmos-p-835-a-non-intrusive-perceptual-objective-speech-quality-metric-to-evaluate-noise-suppressors-2110.01763"/></url>
<url><loc>https://scifaro.com/en/abs/deep-optimization-of-parametric-iir-filters-for-audio-equalization-2110.02077</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-optimization-of-parametric-iir-filters-for-audio-equalization-2110.02077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-optimization-of-parametric-iir-filters-for-audio-equalization-2110.02077"/></url>
<url><loc>https://scifaro.com/en/abs/late-reverberation-suppression-using-u-nets-2110.02144</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/late-reverberation-suppression-using-u-nets-2110.02144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/late-reverberation-suppression-using-u-nets-2110.02144"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-blue-whale-vocalisations-using-a-temporal-domain-convolutional-neural-network-2110.02151</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-blue-whale-vocalisations-using-a-temporal-domain-convolutional-neural-network-2110.02151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-blue-whale-vocalisations-using-a-temporal-domain-convolutional-neural-network-2110.02151"/></url>
<url><loc>https://scifaro.com/en/abs/manifold-learning-supported-estimation-of-relative-transfer-functions-for-spatial-filtering-2110.02189</loc><lastmod>2021-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manifold-learning-supported-estimation-of-relative-transfer-functions-for-spatial-filtering-2110.02189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manifold-learning-supported-estimation-of-relative-transfer-functions-for-spatial-filtering-2110.02189"/></url>
<url><loc>https://scifaro.com/en/abs/fast-contextual-adaptation-with-neural-associative-memory-for-on-device-personalized-speech-recognition-2110.02220</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-contextual-adaptation-with-neural-associative-memory-for-on-device-personalized-speech-recognition-2110.02220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-contextual-adaptation-with-neural-associative-memory-for-on-device-personalized-speech-recognition-2110.02220"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-of-the-fender-bassman-5f6-a-tone-stack-2110.02285</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-of-the-fender-bassman-5f6-a-tone-stack-2110.02285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-of-the-fender-bassman-5f6-a-tone-stack-2110.02285"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-segmentation-and-variable-rate-representation-learning-using-segmental-contrastive-predictive-coding-2110.02345</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-segmentation-and-variable-rate-representation-learning-using-segmental-contrastive-predictive-coding-2110.02345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-segmentation-and-variable-rate-representation-learning-using-segmental-contrastive-predictive-coding-2110.02345"/></url>
<url><loc>https://scifaro.com/en/abs/neural-pitch-shifting-and-time-stretching-with-controllable-lpcnet-2110.02360</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-pitch-shifting-and-time-stretching-with-controllable-lpcnet-2110.02360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-pitch-shifting-and-time-stretching-with-controllable-lpcnet-2110.02360"/></url>
<url><loc>https://scifaro.com/en/abs/mediumvc-any-to-any-voice-conversion-using-synthetic-specific-speaker-speeches-as-intermedium-features-2110.02500</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mediumvc-any-to-any-voice-conversion-using-synthetic-specific-speaker-speeches-as-intermedium-features-2110.02500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mediumvc-any-to-any-voice-conversion-using-synthetic-specific-speaker-speeches-as-intermedium-features-2110.02500"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-recent-deep-learning-driven-singing-voice-synthesis-systems-2110.02511</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-recent-deep-learning-driven-singing-voice-synthesis-systems-2110.02511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-recent-deep-learning-driven-singing-voice-synthesis-systems-2110.02511"/></url>
<url><loc>https://scifaro.com/en/abs/improving-real-time-score-following-in-opera-by-combining-music-with-lyrics-tracking-2110.02592</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-real-time-score-following-in-opera-by-combining-music-with-lyrics-tracking-2110.02592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-real-time-score-following-in-opera-by-combining-music-with-lyrics-tracking-2110.02592"/></url>
<url><loc>https://scifaro.com/en/abs/generalization-ability-of-mos-prediction-networks-2110.02635</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalization-ability-of-mos-prediction-networks-2110.02635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalization-ability-of-mos-prediction-networks-2110.02635"/></url>
<url><loc>https://scifaro.com/en/abs/lower-interaural-coherence-in-off-signal-bands-impairs-binaural-detection-2110.02695</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lower-interaural-coherence-in-off-signal-bands-impairs-binaural-detection-2110.02695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lower-interaural-coherence-in-off-signal-bands-impairs-binaural-detection-2110.02695"/></url>
<url><loc>https://scifaro.com/en/abs/towards-efficient-end-to-end-speech-recognition-with-biologically-inspired-neural-networks-2110.02743</loc><lastmod>2021-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-efficient-end-to-end-speech-recognition-with-biologically-inspired-neural-networks-2110.02743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-efficient-end-to-end-speech-recognition-with-biologically-inspired-neural-networks-2110.02743"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-tts-an-end-to-end-speech-synthesis-system-with-prosody-control-2110.02854</loc><lastmod>2021-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-tts-an-end-to-end-speech-synthesis-system-with-prosody-control-2110.02854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-tts-an-end-to-end-speech-synthesis-system-with-prosody-control-2110.02854"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-audio-caption-aligning-learns-correspondences-between-individual-sound-events-and-textual-phrases-2110.02939</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-audio-caption-aligning-learns-correspondences-between-individual-sound-events-and-textual-phrases-2110.02939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-audio-caption-aligning-learns-correspondences-between-individual-sound-events-and-textual-phrases-2110.02939"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-prosody-modeling-and-control-in-non-autoregressive-parallel-neural-tts-2110.02952</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-prosody-modeling-and-control-in-non-autoregressive-parallel-neural-tts-2110.02952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-prosody-modeling-and-control-in-non-autoregressive-parallel-neural-tts-2110.02952"/></url>
<url><loc>https://scifaro.com/en/abs/aecmos-a-speech-quality-assessment-metric-for-echo-impairment-2110.03010</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aecmos-a-speech-quality-assessment-metric-for-echo-impairment-2110.03010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aecmos-a-speech-quality-assessment-metric-for-echo-impairment-2110.03010"/></url>
<url><loc>https://scifaro.com/en/abs/emphasis-control-for-parallel-neural-tts-2110.03012</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emphasis-control-for-parallel-neural-tts-2110.03012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emphasis-control-for-parallel-neural-tts-2110.03012"/></url>
<url><loc>https://scifaro.com/en/abs/ctc-variations-through-new-wfst-topologies-2110.03098</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctc-variations-through-new-wfst-topologies-2110.03098"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctc-variations-through-new-wfst-topologies-2110.03098"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-speech-enhancement-in-unseen-noisy-and-reverberant-conditions-using-kiss-gev-beamforming-2110.03103</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-speech-enhancement-in-unseen-noisy-and-reverberant-conditions-using-kiss-gev-beamforming-2110.03103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-speech-enhancement-in-unseen-noisy-and-reverberant-conditions-using-kiss-gev-beamforming-2110.03103"/></url>
<url><loc>https://scifaro.com/en/abs/on-audio-enhancement-via-online-non-negative-matrix-factorization-2110.03114</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-audio-enhancement-via-online-non-negative-matrix-factorization-2110.03114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-audio-enhancement-via-online-non-negative-matrix-factorization-2110.03114"/></url>
<url><loc>https://scifaro.com/en/abs/transcribe-to-diarize-neural-speaker-diarization-for-unlimited-number-of-speakers-using-end-to-end-speaker-attributed-asr-2110.03151</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcribe-to-diarize-neural-speaker-diarization-for-unlimited-number-of-speakers-using-end-to-end-speaker-attributed-asr-2110.03151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcribe-to-diarize-neural-speaker-diarization-for-unlimited-number-of-speakers-using-end-to-end-speaker-attributed-asr-2110.03151"/></url>
<url><loc>https://scifaro.com/en/abs/improving-bird-classification-with-unsupervised-sound-separation-2110.03209</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-bird-classification-with-unsupervised-sound-separation-2110.03209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-bird-classification-with-unsupervised-sound-separation-2110.03209"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-dynamic-convolutional-neural-network-for-text-independent-speaker-verification-and-phonemetic-analysis-2110.03213</loc><lastmod>2022-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-dynamic-convolutional-neural-network-for-text-independent-speaker-verification-and-phonemetic-analysis-2110.03213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-dynamic-convolutional-neural-network-for-text-independent-speaker-verification-and-phonemetic-analysis-2110.03213"/></url>
<url><loc>https://scifaro.com/en/abs/filteraugment-an-acoustic-environmental-data-augmentation-method-2110.03282</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filteraugment-an-acoustic-environmental-data-augmentation-method-2110.03282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filteraugment-an-acoustic-environmental-data-augmentation-method-2110.03282"/></url>
<url><loc>https://scifaro.com/en/abs/experimental-investigation-on-stft-phase-representations-for-deep-learning-based-dysarthric-speech-detection-2110.03283</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experimental-investigation-on-stft-phase-representations-for-deep-learning-based-dysarthric-speech-detection-2110.03283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experimental-investigation-on-stft-phase-representations-for-deep-learning-based-dysarthric-speech-detection-2110.03283"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-for-speech-based-arousal-recognition-using-bayesian-neural-networks-2110.03299</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-for-speech-based-arousal-recognition-using-bayesian-neural-networks-2110.03299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-for-speech-based-arousal-recognition-using-bayesian-neural-networks-2110.03299"/></url>
<url><loc>https://scifaro.com/en/abs/explaining-deep-learning-models-for-spoofing-and-deepfake-detection-with-shapley-additive-explanations-2110.03309</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explaining-deep-learning-models-for-spoofing-and-deepfake-detection-with-shapley-additive-explanations-2110.03309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explaining-deep-learning-models-for-spoofing-and-deepfake-detection-with-shapley-additive-explanations-2110.03309"/></url>
<url><loc>https://scifaro.com/en/abs/improving-confidence-estimation-on-out-of-domain-data-for-end-to-end-speech-recognition-2110.03327</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-confidence-estimation-on-out-of-domain-data-for-end-to-end-speech-recognition-2110.03327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-confidence-estimation-on-out-of-domain-data-for-end-to-end-speech-recognition-2110.03327"/></url>
<url><loc>https://scifaro.com/en/abs/towards-universal-neural-vocoding-with-a-multi-band-excited-wavenet-2110.03329</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-universal-neural-vocoding-with-a-multi-band-excited-wavenet-2110.03329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-universal-neural-vocoding-with-a-multi-band-excited-wavenet-2110.03329"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducers-from-large-self-supervised-pre-trained-models-2110.03334</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducers-from-large-self-supervised-pre-trained-models-2110.03334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducers-from-large-self-supervised-pre-trained-models-2110.03334"/></url>
<url><loc>https://scifaro.com/en/abs/visualtts-tts-with-accurate-lip-speech-synchronization-for-automatic-voice-over-2110.03342</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualtts-tts-with-accurate-lip-speech-synchronization-for-automatic-voice-over-2110.03342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualtts-tts-with-accurate-lip-speech-synchronization-for-automatic-voice-over-2110.03342"/></url>
<url><loc>https://scifaro.com/en/abs/cloning-one-s-voice-using-very-limited-data-in-the-wild-2110.03347</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cloning-one-s-voice-using-very-limited-data-in-the-wild-2110.03347"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cloning-one-s-voice-using-very-limited-data-in-the-wild-2110.03347"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-speaker-embedding-based-graph-attention-networks-for-speaker-diarisation-2110.03361</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-speaker-embedding-based-graph-attention-networks-for-speaker-diarisation-2110.03361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-speaker-embedding-based-graph-attention-networks-for-speaker-diarisation-2110.03361"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-memory-network-the-novel-network-structure-for-symbolic-music-generation-2110.03392</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-memory-network-the-novel-network-structure-for-symbolic-music-generation-2110.03392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-memory-network-the-novel-network-structure-for-symbolic-music-generation-2110.03392"/></url>
<url><loc>https://scifaro.com/en/abs/light-sernet-a-lightweight-fully-convolutional-neural-network-for-speech-emotion-recognition-2110.03435</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/light-sernet-a-lightweight-fully-convolutional-neural-network-for-speech-emotion-recognition-2110.03435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/light-sernet-a-lightweight-fully-convolutional-neural-network-for-speech-emotion-recognition-2110.03435"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-the-robustness-of-unsupervised-speech-recognition-2110.03509</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-the-robustness-of-unsupervised-speech-recognition-2110.03509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-the-robustness-of-unsupervised-speech-recognition-2110.03509"/></url>
<url><loc>https://scifaro.com/en/abs/peer-collaborative-learning-for-polyphonic-sound-event-detection-2110.03511</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peer-collaborative-learning-for-polyphonic-sound-event-detection-2110.03511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peer-collaborative-learning-for-polyphonic-sound-event-detection-2110.03511"/></url>
<url><loc>https://scifaro.com/en/abs/accent-robust-automatic-speech-recognition-using-supervised-and-unsupervised-wav2vec-embeddings-2110.03520</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-robust-automatic-speech-recognition-using-supervised-and-unsupervised-wav2vec-embeddings-2110.03520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-robust-automatic-speech-recognition-using-supervised-and-unsupervised-wav2vec-embeddings-2110.03520"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-word-error-training-for-non-autoregressive-transformer-based-code-switching-asr-2110.03573</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-word-error-training-for-non-autoregressive-transformer-based-code-switching-asr-2110.03573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-word-error-training-for-non-autoregressive-transformer-based-code-switching-asr-2110.03573"/></url>
<url><loc>https://scifaro.com/en/abs/mixer-tts-non-autoregressive-fast-and-compact-text-to-speech-model-conditioned-on-language-model-embeddings-2110.03584</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixer-tts-non-autoregressive-fast-and-compact-text-to-speech-model-conditioned-on-language-model-embeddings-2110.03584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixer-tts-non-autoregressive-fast-and-compact-text-to-speech-model-conditioned-on-language-model-embeddings-2110.03584"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-a-fixed-virtual-sensing-feedback-anc-controller-for-in-ear-headphones-with-multiple-loudspeakers-2110.03586</loc><lastmod>2021-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-a-fixed-virtual-sensing-feedback-anc-controller-for-in-ear-headphones-with-multiple-loudspeakers-2110.03586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-a-fixed-virtual-sensing-feedback-anc-controller-for-in-ear-headphones-with-multiple-loudspeakers-2110.03586"/></url>
<url><loc>https://scifaro.com/en/abs/towards-faster-continuous-multi-channel-hrtf-measurements-based-on-learning-system-models-2110.03630</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-faster-continuous-multi-channel-hrtf-measurements-based-on-learning-system-models-2110.03630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-faster-continuous-multi-channel-hrtf-measurements-based-on-learning-system-models-2110.03630"/></url>
<url><loc>https://scifaro.com/en/abs/peaf-learnable-power-efficient-analog-acoustic-features-for-audio-recognition-2110.03715</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peaf-learnable-power-efficient-analog-acoustic-features-for-audio-recognition-2110.03715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peaf-learnable-power-efficient-analog-acoustic-features-for-audio-recognition-2110.03715"/></url>
<url><loc>https://scifaro.com/en/abs/input-length-matters-improving-rnn-t-and-mwer-training-for-long-form-telephony-speech-recognition-2110.03841</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/input-length-matters-improving-rnn-t-and-mwer-training-for-long-form-telephony-speech-recognition-2110.03841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/input-length-matters-improving-rnn-t-and-mwer-training-for-long-form-telephony-speech-recognition-2110.03841"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-the-efficacy-of-model-pre-training-in-developing-neural-text-to-speech-system-2110.03857</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-the-efficacy-of-model-pre-training-in-developing-neural-text-to-speech-system-2110.03857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-the-efficacy-of-model-pre-training-in-developing-neural-text-to-speech-system-2110.03857"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speaker-recognition-with-loss-gated-learning-2110.03869</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speaker-recognition-with-loss-gated-learning-2110.03869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speaker-recognition-with-loss-gated-learning-2110.03869"/></url>
<url><loc>https://scifaro.com/en/abs/environment-aware-text-to-speech-synthesis-2110.03887</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environment-aware-text-to-speech-synthesis-2110.03887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environment-aware-text-to-speech-synthesis-2110.03887"/></url>
<url><loc>https://scifaro.com/en/abs/neural-model-reprogramming-with-similarity-based-mapping-for-low-resource-spoken-command-recognition-2110.03894</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-model-reprogramming-with-similarity-based-mapping-for-low-resource-spoken-command-recognition-2110.03894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-model-reprogramming-with-similarity-based-mapping-for-low-resource-spoken-command-recognition-2110.03894"/></url>
<url><loc>https://scifaro.com/en/abs/joint-scattering-for-automatic-chick-call-recognition-2110.03965</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-scattering-for-automatic-chick-call-recognition-2110.03965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-scattering-for-automatic-chick-call-recognition-2110.03965"/></url>
<url><loc>https://scifaro.com/en/abs/karasinger-score-free-singing-voice-synthesis-with-vq-vae-using-mel-spectrograms-2110.04005</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/karasinger-score-free-singing-voice-synthesis-with-vq-vae-using-mel-spectrograms-2110.04005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/karasinger-score-free-singing-voice-synthesis-with-vq-vae-using-mel-spectrograms-2110.04005"/></url>
<url><loc>https://scifaro.com/en/abs/trunet-transformer-recurrent-u-network-for-multi-channel-reverberant-sound-source-separation-2110.04047</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trunet-transformer-recurrent-u-network-for-multi-channel-reverberant-sound-source-separation-2110.04047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trunet-transformer-recurrent-u-network-for-multi-channel-reverberant-sound-source-separation-2110.04047"/></url>
<url><loc>https://scifaro.com/en/abs/improving-pseudo-label-training-for-end-to-end-speech-recognition-using-gradient-mask-2110.04056</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-pseudo-label-training-for-end-to-end-speech-recognition-using-gradient-mask-2110.04056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-pseudo-label-training-for-end-to-end-speech-recognition-using-gradient-mask-2110.04056"/></url>
<url><loc>https://scifaro.com/en/abs/a-method-for-capturing-and-reproducing-directional-reverberation-in-six-degrees-of-freedom-2110.04082</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-method-for-capturing-and-reproducing-directional-reverberation-in-six-degrees-of-freedom-2110.04082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-method-for-capturing-and-reproducing-directional-reverberation-in-six-degrees-of-freedom-2110.04082"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-conditional-end-to-end-asr-with-ctc-and-multi-granular-subword-units-2110.04109</loc><lastmod>2022-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-conditional-end-to-end-asr-with-ctc-and-multi-granular-subword-units-2110.04109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-conditional-end-to-end-asr-with-ctc-and-multi-granular-subword-units-2110.04109"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-speaker-condition-layer-normalization-and-semi-supervised-training-in-text-to-speech-2110.04153</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-speaker-condition-layer-normalization-and-semi-supervised-training-in-text-to-speech-2110.04153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-based-on-speaker-condition-layer-normalization-and-semi-supervised-training-in-text-to-speech-2110.04153"/></url>
<url><loc>https://scifaro.com/en/abs/scala-supervised-contrastive-learning-for-end-to-end-speech-recognition-2110.04187</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scala-supervised-contrastive-learning-for-end-to-end-speech-recognition-2110.04187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scala-supervised-contrastive-learning-for-end-to-end-speech-recognition-2110.04187"/></url>
<url><loc>https://scifaro.com/en/abs/cognitive-coding-of-speech-2110.04241</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cognitive-coding-of-speech-2110.04241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cognitive-coding-of-speech-2110.04241"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-the-robustness-of-raw-waveform-based-speaker-embeddings-under-mismatched-conditions-2110.04265</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-the-robustness-of-raw-waveform-based-speaker-embeddings-under-mismatched-conditions-2110.04265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-the-robustness-of-raw-waveform-based-speaker-embeddings-under-mismatched-conditions-2110.04265"/></url>
<url><loc>https://scifaro.com/en/abs/location-based-training-for-multi-channel-talker-independent-speaker-separation-2110.04289</loc><lastmod>2021-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/location-based-training-for-multi-channel-talker-independent-speaker-separation-2110.04289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/location-based-training-for-multi-channel-talker-independent-speaker-separation-2110.04289"/></url>
<url><loc>https://scifaro.com/en/abs/musicnet-compact-convolutional-neural-network-for-real-time-background-music-detection-2110.04331</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musicnet-compact-convolutional-neural-network-for-real-time-background-music-detection-2110.04331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musicnet-compact-convolutional-neural-network-for-real-time-background-music-detection-2110.04331"/></url>
<url><loc>https://scifaro.com/en/abs/performance-optimizations-on-deep-noise-suppression-models-2110.04378</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-optimizations-on-deep-noise-suppression-models-2110.04378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-optimizations-on-deep-noise-suppression-models-2110.04378"/></url>
<url><loc>https://scifaro.com/en/abs/individualized-hear-through-for-acoustic-transparency-using-pca-based-sound-pressure-estimation-at-the-eardrum-2110.04385</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/individualized-hear-through-for-acoustic-transparency-using-pca-based-sound-pressure-estimation-at-the-eardrum-2110.04385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/individualized-hear-through-for-acoustic-transparency-using-pca-based-sound-pressure-estimation-at-the-eardrum-2110.04385"/></url>
<url><loc>https://scifaro.com/en/abs/aura-privacy-preserving-augmentation-to-improve-test-set-diversity-in-speech-enhancement-2110.04391</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aura-privacy-preserving-augmentation-to-improve-test-set-diversity-in-speech-enhancement-2110.04391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aura-privacy-preserving-augmentation-to-improve-test-set-diversity-in-speech-enhancement-2110.04391"/></url>
<url><loc>https://scifaro.com/en/abs/titanet-neural-model-for-speaker-representation-with-1d-depth-wise-separable-convolutions-and-global-context-2110.04410</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/titanet-neural-model-for-speaker-representation-with-1d-depth-wise-separable-convolutions-and-global-context-2110.04410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/titanet-neural-model-for-speaker-representation-with-1d-depth-wise-separable-convolutions-and-global-context-2110.04410"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-approach-for-assessing-neuromotor-coordination-in-schizophrenia-using-convolutional-neural-networks-2110.04440</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-approach-for-assessing-neuromotor-coordination-in-schizophrenia-using-convolutional-neural-networks-2110.04440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-approach-for-assessing-neuromotor-coordination-in-schizophrenia-using-convolutional-neural-networks-2110.04440"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lifelong-learning-of-multilingual-text-to-speech-synthesis-2110.04482</loc><lastmod>2022-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lifelong-learning-of-multilingual-text-to-speech-synthesis-2110.04482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lifelong-learning-of-multilingual-text-to-speech-synthesis-2110.04482"/></url>
<url><loc>https://scifaro.com/en/abs/wav2vec-s-semi-supervised-pre-training-for-low-resource-asr-2110.04484</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2vec-s-semi-supervised-pre-training-for-low-resource-asr-2110.04484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2vec-s-semi-supervised-pre-training-for-low-resource-asr-2110.04484"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-with-locally-time-reversed-speech-for-automatic-speech-recognition-2110.04511</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-with-locally-time-reversed-speech-for-automatic-speech-recognition-2110.04511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-with-locally-time-reversed-speech-for-automatic-speech-recognition-2110.04511"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-network-for-semantically-aware-and-speech-driven-upper-face-generation-2110.04527</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-network-for-semantically-aware-and-speech-driven-upper-face-generation-2110.04527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-network-for-semantically-aware-and-speech-driven-upper-face-generation-2110.04527"/></url>
<url><loc>https://scifaro.com/en/abs/visually-exploring-multi-purpose-audio-data-2110.04584</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-exploring-multi-purpose-audio-data-2110.04584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-exploring-multi-purpose-audio-data-2110.04584"/></url>
<url><loc>https://scifaro.com/en/abs/an-evaluation-of-data-augmentation-methods-for-sound-scene-geotagging-2110.04585</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-evaluation-of-data-augmentation-methods-for-sound-scene-geotagging-2110.04585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-evaluation-of-data-augmentation-methods-for-sound-scene-geotagging-2110.04585"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-automatic-speech-recognition-trained-on-small-disordered-speech-datasets-2110.04612</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-automatic-speech-recognition-trained-on-small-disordered-speech-datasets-2110.04612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-automatic-speech-recognition-trained-on-small-disordered-speech-datasets-2110.04612"/></url>
<url><loc>https://scifaro.com/en/abs/complex-network-based-approach-for-feature-extraction-and-classification-of-musical-genres-2110.04654</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-network-based-approach-for-feature-extraction-and-classification-of-musical-genres-2110.04654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-network-based-approach-for-feature-extraction-and-classification-of-musical-genres-2110.04654"/></url>
<url><loc>https://scifaro.com/en/abs/poformer-a-simple-pooling-transformer-for-speaker-verification-2110.04692</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/poformer-a-simple-pooling-transformer-for-speaker-verification-2110.04692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/poformer-a-simple-pooling-transformer-for-speaker-verification-2110.04692"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-end-to-end-neural-diarization-with-distributed-microphones-2110.04694</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-end-to-end-neural-diarization-with-distributed-microphones-2110.04694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-end-to-end-neural-diarization-with-distributed-microphones-2110.04694"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-the-confidence-of-speech-spoofing-countermeasure-2110.04775</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-the-confidence-of-speech-spoofing-countermeasure-2110.04775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-the-confidence-of-speech-spoofing-countermeasure-2110.04775"/></url>
<url><loc>https://scifaro.com/en/abs/stepwise-refining-speech-separation-network-via-fine-grained-encoding-in-high-order-latent-domain-2110.04791</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stepwise-refining-speech-separation-network-via-fine-grained-encoding-in-high-order-latent-domain-2110.04791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stepwise-refining-speech-separation-network-via-fine-grained-encoding-in-high-order-latent-domain-2110.04791"/></url>
<url><loc>https://scifaro.com/en/abs/direct-source-and-early-reflections-localization-using-deep-deconvolution-network-under-reverberant-environment-2110.04850</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direct-source-and-early-reflections-localization-using-deep-deconvolution-network-under-reverberant-environment-2110.04850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direct-source-and-early-reflections-localization-using-deep-deconvolution-network-under-reverberant-environment-2110.04850"/></url>
<url><loc>https://scifaro.com/en/abs/injecting-text-and-cross-lingual-supervision-in-few-shot-learning-from-self-supervised-models-2110.04863</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/injecting-text-and-cross-lingual-supervision-in-few-shot-learning-from-self-supervised-models-2110.04863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/injecting-text-and-cross-lingual-supervision-in-few-shot-learning-from-self-supervised-models-2110.04863"/></url>
<url><loc>https://scifaro.com/en/abs/ditto-data-efficient-and-fair-targeted-subset-selection-for-asr-accent-adaptation-2110.04908</loc><lastmod>2024-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditto-data-efficient-and-fair-targeted-subset-selection-for-asr-accent-adaptation-2110.04908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditto-data-efficient-and-fair-targeted-subset-selection-for-asr-accent-adaptation-2110.04908"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-momentum-pseudo-labeling-with-conformer-and-initialization-strategy-2110.04948</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-momentum-pseudo-labeling-with-conformer-and-initialization-strategy-2110.04948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-momentum-pseudo-labeling-with-conformer-and-initialization-strategy-2110.04948"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-self-attention-based-transformer-for-speaker-recognition-2110.05036</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-self-attention-based-transformer-for-speaker-recognition-2110.05036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-self-attention-based-transformer-for-speaker-recognition-2110.05036"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-transformer-transducer-based-speech-recognition-using-non-causal-convolution-2110.05241</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-transformer-transducer-based-speech-recognition-using-non-causal-convolution-2110.05241"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-transformer-transducer-based-speech-recognition-using-non-causal-convolution-2110.05241"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-non-autoregressive-modelings-for-speech-to-text-generation-2110.05249</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-non-autoregressive-modelings-for-speech-to-text-generation-2110.05249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-non-autoregressive-modelings-for-speech-to-text-generation-2110.05249"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-feature-fusion-for-end-to-end-noise-robust-speech-recognition-2110.05267</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-feature-fusion-for-end-to-end-noise-robust-speech-recognition-2110.05267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-feature-fusion-for-end-to-end-noise-robust-speech-recognition-2110.05267"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-invertibility-of-a-voice-privacy-system-using-embedding-alignement-2110.05431</loc><lastmod>2021-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-invertibility-of-a-voice-privacy-system-using-embedding-alignement-2110.05431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-invertibility-of-a-voice-privacy-system-using-embedding-alignement-2110.05431"/></url>
<url><loc>https://scifaro.com/en/abs/sru-pioneering-fast-recurrence-with-attention-for-speech-recognition-2110.05571</loc><lastmod>2021-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sru-pioneering-fast-recurrence-with-attention-for-speech-recognition-2110.05571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sru-pioneering-fast-recurrence-with-attention-for-speech-recognition-2110.05571"/></url>
<url><loc>https://scifaro.com/en/abs/deepfilternet-a-low-complexity-speech-enhancement-framework-for-full-band-audio-based-on-deep-filtering-2110.05588</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfilternet-a-low-complexity-speech-enhancement-framework-for-full-band-audio-based-on-deep-filtering-2110.05588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfilternet-a-low-complexity-speech-enhancement-framework-for-full-band-audio-based-on-deep-filtering-2110.05588"/></url>
<url><loc>https://scifaro.com/en/abs/the-mirrornet-learning-audio-synthesizer-controls-inspired-by-sensorimotor-interaction-2110.05695</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-mirrornet-learning-audio-synthesizer-controls-inspired-by-sensorimotor-interaction-2110.05695"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-mirrornet-learning-audio-synthesizer-controls-inspired-by-sensorimotor-interaction-2110.05695"/></url>
<url><loc>https://scifaro.com/en/abs/vararray-array-geometry-agnostic-continuous-speech-separation-2110.05745</loc><lastmod>2021-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vararray-array-geometry-agnostic-continuous-speech-separation-2110.05745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vararray-array-geometry-agnostic-continuous-speech-separation-2110.05745"/></url>
<url><loc>https://scifaro.com/en/abs/bertraffic-bert-based-joint-speaker-role-and-speaker-change-detection-for-air-traffic-control-communications-2110.05781</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bertraffic-bert-based-joint-speaker-role-and-speaker-change-detection-for-air-traffic-control-communications-2110.05781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bertraffic-bert-based-joint-speaker-role-and-speaker-change-detection-for-air-traffic-control-communications-2110.05781"/></url>
<url><loc>https://scifaro.com/en/abs/improving-character-error-rate-is-not-equal-to-having-clean-speech-speech-enhancement-for-asr-systems-with-black-box-acoustic-models-2110.05968</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-character-error-rate-is-not-equal-to-having-clean-speech-speech-enhancement-for-asr-systems-with-black-box-acoustic-models-2110.05968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-character-error-rate-is-not-equal-to-having-clean-speech-speech-enhancement-for-asr-systems-with-black-box-acoustic-models-2110.05968"/></url>
<url><loc>https://scifaro.com/en/abs/word-order-does-not-matter-for-speech-recognition-2110.05994</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/word-order-does-not-matter-for-speech-recognition-2110.05994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/word-order-does-not-matter-for-speech-recognition-2110.05994"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-mixup-directional-loudness-modification-as-data-augmentation-for-sound-event-localization-and-detection-2110.06126</loc><lastmod>2022-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-mixup-directional-loudness-modification-as-data-augmentation-for-sound-event-localization-and-detection-2110.06126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-mixup-directional-loudness-modification-as-data-augmentation-for-sound-event-localization-and-detection-2110.06126"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-time-domain-velocity-vector-2110.06304</loc><lastmod>2022-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-time-domain-velocity-vector-2110.06304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-time-domain-velocity-vector-2110.06304"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-style-control-in-transformer-based-text-to-speech-synthesis-2110.06306</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-style-control-in-transformer-based-text-to-speech-synthesis-2110.06306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-style-control-in-transformer-based-text-to-speech-synthesis-2110.06306"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-wav2vec-2-0-fine-tuning-for-improved-speech-emotion-recognition-2110.06309</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-wav2vec-2-0-fine-tuning-for-improved-speech-emotion-recognition-2110.06309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-wav2vec-2-0-fine-tuning-for-improved-speech-emotion-recognition-2110.06309"/></url>
<url><loc>https://scifaro.com/en/abs/all-neural-beamformer-for-continuous-speech-separation-2110.06428</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-neural-beamformer-for-continuous-speech-separation-2110.06428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-neural-beamformer-for-continuous-speech-separation-2110.06428"/></url>
<url><loc>https://scifaro.com/en/abs/deepa-a-deep-neural-analyzer-for-speech-and-singing-vocoding-2110.06434</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepa-a-deep-neural-analyzer-for-speech-and-singing-vocoding-2110.06434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepa-a-deep-neural-analyzer-for-speech-and-singing-vocoding-2110.06434"/></url>
<url><loc>https://scifaro.com/en/abs/sdr-medium-rare-with-fast-computations-2110.06440</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sdr-medium-rare-with-fast-computations-2110.06440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sdr-medium-rare-with-fast-computations-2110.06440"/></url>
<url><loc>https://scifaro.com/en/abs/independence-based-joint-dereverberation-and-separation-with-neural-source-model-2110.06545</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independence-based-joint-dereverberation-and-separation-with-neural-source-model-2110.06545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independence-based-joint-dereverberation-and-separation-with-neural-source-model-2110.06545"/></url>
<url><loc>https://scifaro.com/en/abs/a-melody-unsupervision-model-for-singing-voice-synthesis-2110.06546</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-melody-unsupervision-model-for-singing-voice-synthesis-2110.06546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-melody-unsupervision-model-for-singing-voice-synthesis-2110.06546"/></url>
<url><loc>https://scifaro.com/en/abs/diverse-audio-captioning-via-adversarial-training-2110.06691</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diverse-audio-captioning-via-adversarial-training-2110.06691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diverse-audio-captioning-via-adversarial-training-2110.06691"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-speech-content-privacy-2110.06760</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-speech-content-privacy-2110.06760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-speech-content-privacy-2110.06760"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-importance-of-f0-trajectories-for-speaker-anonymization-using-x-vectors-and-neural-waveform-models-2110.06887</loc><lastmod>2021-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-importance-of-f0-trajectories-for-speaker-anonymization-using-x-vectors-and-neural-waveform-models-2110.06887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-importance-of-f0-trajectories-for-speaker-anonymization-using-x-vectors-and-neural-waveform-models-2110.06887"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-using-lattice-free-mmi-for-speech-recognition-2110.07055</loc><lastmod>2021-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-using-lattice-free-mmi-for-speech-recognition-2110.07055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-using-lattice-free-mmi-for-speech-recognition-2110.07055"/></url>
<url><loc>https://scifaro.com/en/abs/auxiliary-loss-of-transformer-with-residual-connection-for-end-to-end-speaker-diarization-2110.07116</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auxiliary-loss-of-transformer-with-residual-connection-for-end-to-end-speaker-diarization-2110.07116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auxiliary-loss-of-transformer-with-residual-connection-for-end-to-end-speaker-diarization-2110.07116"/></url>
<url><loc>https://scifaro.com/en/abs/multi-accdoa-localizing-and-detecting-overlapping-sounds-from-the-same-class-with-auxiliary-duplicating-permutation-invariant-training-2110.07124</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-accdoa-localizing-and-detecting-overlapping-sounds-from-the-same-class-with-auxiliary-duplicating-permutation-invariant-training-2110.07124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-accdoa-localizing-and-detecting-overlapping-sounds-from-the-same-class-with-auxiliary-duplicating-permutation-invariant-training-2110.07124"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-timbre-disentanglement-in-non-autoregressive-cross-lingual-text-to-speech-2110.07192</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-timbre-disentanglement-in-non-autoregressive-cross-lingual-text-to-speech-2110.07192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-timbre-disentanglement-in-non-autoregressive-cross-lingual-text-to-speech-2110.07192"/></url>
<url><loc>https://scifaro.com/en/abs/speecht5-unified-modal-encoder-decoder-pre-training-for-spoken-language-processing-2110.07205</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speecht5-unified-modal-encoder-decoder-pre-training-for-spoken-language-processing-2110.07205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speecht5-unified-modal-encoder-decoder-pre-training-for-spoken-language-processing-2110.07205"/></url>
<url><loc>https://scifaro.com/en/abs/fedspeech-federated-text-to-speech-with-continual-learning-2110.07216</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fedspeech-federated-text-to-speech-with-continual-learning-2110.07216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fedspeech-federated-text-to-speech-with-continual-learning-2110.07216"/></url>
<url><loc>https://scifaro.com/en/abs/student-t-networks-for-melody-estimation-2110.07419</loc><lastmod>2021-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/student-t-networks-for-melody-estimation-2110.07419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/student-t-networks-for-melody-estimation-2110.07419"/></url>
<url><loc>https://scifaro.com/en/abs/singgan-generative-adversarial-network-for-high-fidelity-singing-voice-generation-2110.07468</loc><lastmod>2022-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singgan-generative-adversarial-network-for-high-fidelity-singing-voice-generation-2110.07468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singgan-generative-adversarial-network-for-high-fidelity-singing-voice-generation-2110.07468"/></url>
<url><loc>https://scifaro.com/en/abs/toward-degradation-robust-voice-conversion-2110.07537</loc><lastmod>2022-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-degradation-robust-voice-conversion-2110.07537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-degradation-robust-voice-conversion-2110.07537"/></url>
<url><loc>https://scifaro.com/en/abs/don-t-speak-too-fast-the-impact-of-data-bias-on-self-supervised-speech-models-2110.07957</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/don-t-speak-too-fast-the-impact-of-data-bias-on-self-supervised-speech-models-2110.07957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/don-t-speak-too-fast-the-impact-of-data-bias-on-self-supervised-speech-models-2110.07957"/></url>
<url><loc>https://scifaro.com/en/abs/neural-dubber-dubbing-for-videos-according-to-scripts-2110.08243</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-dubber-dubbing-for-videos-according-to-scripts-2110.08243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-dubber-dubbing-for-videos-according-to-scripts-2110.08243"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-speaker-adaptation-approach-for-asr-2110.08545</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-speaker-adaptation-approach-for-asr-2110.08545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-speaker-adaptation-approach-for-asr-2110.08545"/></url>
<url><loc>https://scifaro.com/en/abs/asr4real-an-extended-benchmark-for-speech-models-2110.08583</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr4real-an-extended-benchmark-for-speech-models-2110.08583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr4real-an-extended-benchmark-for-speech-models-2110.08583"/></url>
<url><loc>https://scifaro.com/en/abs/a-variational-bayesian-approach-to-learning-latent-variables-for-acoustic-knowledge-transfer-2110.08598</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variational-bayesian-approach-to-learning-latent-variables-for-acoustic-knowledge-transfer-2110.08598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variational-bayesian-approach-to-learning-latent-variables-for-acoustic-knowledge-transfer-2110.08598"/></url>
<url><loc>https://scifaro.com/en/abs/visinger-variational-inference-with-adversarial-learning-for-end-to-end-singing-voice-synthesis-2110.08813</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visinger-variational-inference-with-adversarial-learning-for-end-to-end-singing-voice-synthesis-2110.08813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visinger-variational-inference-with-adversarial-learning-for-end-to-end-singing-voice-synthesis-2110.08813"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-edm-subgenre-classification-using-mel-spectrogram-and-tempogram-features-2110.08862</loc><lastmod>2021-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-edm-subgenre-classification-using-mel-spectrogram-and-tempogram-features-2110.08862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-edm-subgenre-classification-using-mel-spectrogram-and-tempogram-features-2110.08862"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-metric-learning-for-music-structure-features-2110.09000</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-metric-learning-for-music-structure-features-2110.09000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-metric-learning-for-music-structure-features-2110.09000"/></url>
<url><loc>https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-with-iterative-casting-and-boost-start-for-target-source-extraction-using-reference-2110.09019</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-with-iterative-casting-and-boost-start-for-target-source-extraction-using-reference-2110.09019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/similarity-and-independence-aware-beamformer-with-iterative-casting-and-boost-start-for-target-source-extraction-using-reference-2110.09019"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-the-score-shift-in-cross-lingual-speaker-verification-by-exploiting-language-information-2110.09150</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-the-score-shift-in-cross-lingual-speaker-verification-by-exploiting-language-information-2110.09150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-the-score-shift-in-cross-lingual-speaker-verification-by-exploiting-language-information-2110.09150"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-speech-enhancement-new-models-and-comprehensive-evaluation-2110.09625</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-speech-enhancement-new-models-and-comprehensive-evaluation-2110.09625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-speech-enhancement-new-models-and-comprehensive-evaluation-2110.09625"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-pre-training-for-automated-speech-recognition-2110.09890</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-pre-training-for-automated-speech-recognition-2110.09890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-pre-training-for-automated-speech-recognition-2110.09890"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-assisted-voice-conversion-in-noisy-environments-2110.09923</loc><lastmod>2023-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-assisted-voice-conversion-in-noisy-environments-2110.09923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-assisted-voice-conversion-in-noisy-environments-2110.09923"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-based-on-cyclegan-with-noise-informed-training-2110.09924</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-based-on-cyclegan-with-noise-informed-training-2110.09924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-based-on-cyclegan-with-noise-informed-training-2110.09924"/></url>
<url><loc>https://scifaro.com/en/abs/cycleflow-purify-information-factors-by-cycle-loss-2110.09928</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cycleflow-purify-information-factors-by-cycle-loss-2110.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cycleflow-purify-information-factors-by-cycle-loss-2110.09928"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representation-learning-through-self-supervised-pretraining-and-multi-task-finetuning-2110.09930</loc><lastmod>2021-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representation-learning-through-self-supervised-pretraining-and-multi-task-finetuning-2110.09930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representation-learning-through-self-supervised-pretraining-and-multi-task-finetuning-2110.09930"/></url>
<url><loc>https://scifaro.com/en/abs/the-cocktail-fork-problem-three-stem-audio-separation-for-real-world-soundtracks-2110.09958</loc><lastmod>2022-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-cocktail-fork-problem-three-stem-audio-separation-for-real-world-soundtracks-2110.09958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-cocktail-fork-problem-three-stem-audio-separation-for-real-world-soundtracks-2110.09958"/></url>
<url><loc>https://scifaro.com/en/abs/private-language-model-adaptation-for-speech-recognition-2110.10026</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/private-language-model-adaptation-for-speech-recognition-2110.10026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/private-language-model-adaptation-for-speech-recognition-2110.10026"/></url>
<url><loc>https://scifaro.com/en/abs/chunked-autoregressive-gan-for-conditional-waveform-synthesis-2110.10139</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chunked-autoregressive-gan-for-conditional-waveform-synthesis-2110.10139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chunked-autoregressive-gan-for-conditional-waveform-synthesis-2110.10139"/></url>
<url><loc>https://scifaro.com/en/abs/disentanglement-of-emotional-style-and-speaker-identity-for-expressive-voice-conversion-2110.10326</loc><lastmod>2022-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentanglement-of-emotional-style-and-speaker-identity-for-expressive-voice-conversion-2110.10326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentanglement-of-emotional-style-and-speaker-identity-for-expressive-voice-conversion-2110.10326"/></url>
<url><loc>https://scifaro.com/en/abs/one-model-to-enhance-them-all-array-geometry-agnostic-multi-channel-personalized-speech-enhancement-2110.10330</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-model-to-enhance-them-all-array-geometry-agnostic-multi-channel-personalized-speech-enhancement-2110.10330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-model-to-enhance-them-all-array-geometry-agnostic-multi-channel-personalized-speech-enhancement-2110.10330"/></url>
<url><loc>https://scifaro.com/en/abs/real-m-towards-speech-separation-on-real-mixtures-2110.10812</loc><lastmod>2021-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-m-towards-speech-separation-on-real-mixtures-2110.10812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-m-towards-speech-separation-on-real-mixtures-2110.10812"/></url>
<url><loc>https://scifaro.com/en/abs/rct-random-consistency-training-for-semi-supervised-sound-event-detection-2110.11144</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rct-random-consistency-training-for-semi-supervised-sound-event-detection-2110.11144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rct-random-consistency-training-for-semi-supervised-sound-event-detection-2110.11144"/></url>
<url><loc>https://scifaro.com/en/abs/objective-measures-of-perceptual-audio-quality-reviewed-an-evaluation-of-their-application-domain-dependence-2110.11438</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-measures-of-perceptual-audio-quality-reviewed-an-evaluation-of-their-application-domain-dependence-2110.11438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-measures-of-perceptual-audio-quality-reviewed-an-evaluation-of-their-application-domain-dependence-2110.11438"/></url>
<url><loc>https://scifaro.com/en/abs/synt-utilizing-imperfect-synthetic-data-to-improve-speech-recognition-2110.11479</loc><lastmod>2021-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synt-utilizing-imperfect-synthetic-data-to-improve-speech-recognition-2110.11479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synt-utilizing-imperfect-synthetic-data-to-improve-speech-recognition-2110.11479"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-acoustic-features-in-arabic-speaker-identification-under-noisy-environmental-conditions-2110.12304</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-acoustic-features-in-arabic-speaker-identification-under-noisy-environmental-conditions-2110.12304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-acoustic-features-in-arabic-speaker-identification-under-noisy-environmental-conditions-2110.12304"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-and-interpretable-singing-voice-decomposition-via-assem-vc-2110.12676</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-and-interpretable-singing-voice-decomposition-via-assem-vc-2110.12676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-and-interpretable-singing-voice-decomposition-via-assem-vc-2110.12676"/></url>
<url><loc>https://scifaro.com/en/abs/on-synchronization-of-wireless-acoustic-sensor-networks-in-the-presence-of-time-varying-sampling-rate-offsets-and-speaker-changes-2110.12820</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-synchronization-of-wireless-acoustic-sensor-networks-in-the-presence-of-time-varying-sampling-rate-offsets-and-speaker-changes-2110.12820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-synchronization-of-wireless-acoustic-sensor-networks-in-the-presence-of-time-varying-sampling-rate-offsets-and-speaker-changes-2110.12820"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-impact-sounding-acoustic-inspection-of-concrete-structure-2110.13125</loc><lastmod>2021-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-impact-sounding-acoustic-inspection-of-concrete-structure-2110.13125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-impact-sounding-acoustic-inspection-of-concrete-structure-2110.13125"/></url>
<url><loc>https://scifaro.com/en/abs/towards-audio-domain-adaptation-for-acoustic-scene-classification-using-disentanglement-learning-2110.13586</loc><lastmod>2021-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-audio-domain-adaptation-for-acoustic-scene-classification-using-disentanglement-learning-2110.13586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-audio-domain-adaptation-for-acoustic-scene-classification-using-disentanglement-learning-2110.13586"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speaker-representation-with-semi-supervised-learning-approach-for-speaker-profiling-2110.13653</loc><lastmod>2021-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speaker-representation-with-semi-supervised-learning-approach-for-speaker-profiling-2110.13653"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speaker-representation-with-semi-supervised-learning-approach-for-speaker-profiling-2110.13653"/></url>
<url><loc>https://scifaro.com/en/abs/closing-the-gap-between-time-domain-multi-channel-speech-enhancement-on-real-and-simulation-conditions-2110.14139</loc><lastmod>2021-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/closing-the-gap-between-time-domain-multi-channel-speech-enhancement-on-real-and-simulation-conditions-2110.14139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/closing-the-gap-between-time-domain-multi-channel-speech-enhancement-on-real-and-simulation-conditions-2110.14139"/></url>
<url><loc>https://scifaro.com/en/abs/separating-long-form-speech-with-group-wise-permutation-invariant-training-2110.14142</loc><lastmod>2021-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separating-long-form-speech-with-group-wise-permutation-invariant-training-2110.14142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separating-long-form-speech-with-group-wise-permutation-invariant-training-2110.14142"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-separation-with-recurrent-selective-attention-network-2110.14838</loc><lastmod>2021-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-separation-with-recurrent-selective-attention-network-2110.14838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-separation-with-recurrent-selective-attention-network-2110.14838"/></url>
<url><loc>https://scifaro.com/en/abs/torchaudio-building-blocks-for-audio-and-speech-processing-2110.15018</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/torchaudio-building-blocks-for-audio-and-speech-processing-2110.15018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/torchaudio-building-blocks-for-audio-and-speech-processing-2110.15018"/></url>
<url><loc>https://scifaro.com/en/abs/sa-sdr-a-novel-loss-function-for-separation-of-meeting-style-data-2110.15581</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-sdr-a-novel-loss-function-for-separation-of-meeting-style-data-2110.15581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-sdr-a-novel-loss-function-for-separation-of-meeting-style-data-2110.15581"/></url>
<url><loc>https://scifaro.com/en/abs/fusing-asr-outputs-in-joint-training-for-speech-emotion-recognition-2110.15684</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusing-asr-outputs-in-joint-training-for-speech-emotion-recognition-2110.15684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusing-asr-outputs-in-joint-training-for-speech-emotion-recognition-2110.15684"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-joint-decoding-based-multi-talker-speech-recognition-with-dnn-acoustic-model-2111.00009</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-joint-decoding-based-multi-talker-speech-recognition-with-dnn-acoustic-model-2111.00009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-joint-decoding-based-multi-talker-speech-recognition-with-dnn-acoustic-model-2111.00009"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-tracking-based-training-of-deep-learning-sound-source-localizers-2111.00030</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-tracking-based-training-of-deep-learning-sound-source-localizers-2111.00030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-tracking-based-training-of-deep-learning-sound-source-localizers-2111.00030"/></url>
<url><loc>https://scifaro.com/en/abs/cross-attention-conformer-for-context-modeling-in-speech-enhancement-for-asr-2111.00127</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-attention-conformer-for-context-modeling-in-speech-enhancement-for-asr-2111.00127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-attention-conformer-for-context-modeling-in-speech-enhancement-for-asr-2111.00127"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speech-denoising-using-only-noisy-audio-signals-2111.00242</loc><lastmod>2023-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speech-denoising-using-only-noisy-audio-signals-2111.00242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speech-denoising-using-only-noisy-audio-signals-2111.00242"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speaker-counting-in-a-cocktail-party-scenario-using-attention-guided-convolutional-neural-network-2111.00316</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speaker-counting-in-a-cocktail-party-scenario-using-attention-guided-convolutional-neural-network-2111.00316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speaker-counting-in-a-cocktail-party-scenario-using-attention-guided-convolutional-neural-network-2111.00316"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioning-of-acoustic-models-using-affine-transformation-for-multi-speaker-speech-recognition-2111.00320</loc><lastmod>2021-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioning-of-acoustic-models-using-affine-transformation-for-multi-speaker-speech-recognition-2111.00320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioning-of-acoustic-models-using-affine-transformation-for-multi-speaker-speech-recognition-2111.00320"/></url>
<url><loc>https://scifaro.com/en/abs/snri-target-training-for-joint-speech-enhancement-and-recognition-2111.00764</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snri-target-training-for-joint-speech-enhancement-and-recognition-2111.00764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snri-target-training-for-joint-speech-enhancement-and-recognition-2111.00764"/></url>
<url><loc>https://scifaro.com/en/abs/avaspeech-smad-a-strongly-labelled-speech-and-music-activity-detection-dataset-with-label-co-occurrence-2111.01320</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avaspeech-smad-a-strongly-labelled-speech-and-music-activity-detection-dataset-with-label-co-occurrence-2111.01320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avaspeech-smad-a-strongly-labelled-speech-and-music-activity-detection-dataset-with-label-co-occurrence-2111.01320"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-transfer-for-speech-processing-using-acoustic-language-similarity-2111.01326</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-transfer-for-speech-processing-using-acoustic-language-similarity-2111.01326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-transfer-for-speech-processing-using-acoustic-language-similarity-2111.01326"/></url>
<url><loc>https://scifaro.com/en/abs/design-and-evaluation-of-active-noise-control-on-machinery-noise-2111.01652</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-and-evaluation-of-active-noise-control-on-machinery-noise-2111.01652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-and-evaluation-of-active-noise-control-on-machinery-noise-2111.01652"/></url>
<url><loc>https://scifaro.com/en/abs/recent-advances-in-end-to-end-automatic-speech-recognition-2111.01690</loc><lastmod>2022-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-advances-in-end-to-end-automatic-speech-recognition-2111.01690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-advances-in-end-to-end-automatic-speech-recognition-2111.01690"/></url>
<url><loc>https://scifaro.com/en/abs/multi-input-architecture-and-disentangled-representation-learning-for-multi-dimensional-modeling-of-music-similarity-2111.01710</loc><lastmod>2021-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-input-architecture-and-disentangled-representation-learning-for-multi-dimensional-modeling-of-music-similarity-2111.01710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-input-architecture-and-disentangled-representation-learning-for-multi-dimensional-modeling-of-music-similarity-2111.01710"/></url>
<url><loc>https://scifaro.com/en/abs/reduction-of-subjective-listening-effort-for-tv-broadcast-signals-with-recurrent-neural-networks-2111.01914</loc><lastmod>2021-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduction-of-subjective-listening-effort-for-tv-broadcast-signals-with-recurrent-neural-networks-2111.01914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduction-of-subjective-listening-effort-for-tv-broadcast-signals-with-recurrent-neural-networks-2111.01914"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-non-intrusive-multi-objective-speech-assessment-model-with-cross-domain-features-2111.02363</loc><lastmod>2024-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-non-intrusive-multi-objective-speech-assessment-model-with-cross-domain-features-2111.02363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-non-intrusive-multi-objective-speech-assessment-model-with-cross-domain-features-2111.02363"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-discrete-and-soft-speech-units-for-improved-voice-conversion-2111.02392</loc><lastmod>2022-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-discrete-and-soft-speech-units-for-improved-voice-conversion-2111.02392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-discrete-and-soft-speech-units-for-improved-voice-conversion-2111.02392"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-can-improve-asr-in-very-low-resource-settings-2111.02674</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-can-improve-asr-in-very-low-resource-settings-2111.02674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-can-improve-asr-in-very-low-resource-settings-2111.02674"/></url>
<url><loc>https://scifaro.com/en/abs/parsinorm-a-persian-toolkit-for-speech-processing-normalization-2111.03470</loc><lastmod>2021-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parsinorm-a-persian-toolkit-for-speech-processing-normalization-2111.03470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parsinorm-a-persian-toolkit-for-speech-processing-normalization-2111.03470"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-extraction-independent-vector-extraction-guided-by-supervised-speaker-identification-2111.03482</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-extraction-independent-vector-extraction-guided-by-supervised-speaker-identification-2111.03482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-extraction-independent-vector-extraction-guided-by-supervised-speaker-identification-2111.03482"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-spectrogram-and-waveform-source-separation-2111.03600</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-spectrogram-and-waveform-source-separation-2111.03600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-spectrogram-and-waveform-source-separation-2111.03600"/></url>
<url><loc>https://scifaro.com/en/abs/class-token-and-knowledge-distillation-for-multi-head-self-attention-speaker-verification-systems-2111.03842</loc><lastmod>2023-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/class-token-and-knowledge-distillation-for-multi-head-self-attention-speaker-verification-systems-2111.03842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/class-token-and-knowledge-distillation-for-multi-head-self-attention-speaker-verification-systems-2111.03842"/></url>
<url><loc>https://scifaro.com/en/abs/deep-noise-suppression-maximizing-non-differentiable-pesq-mediated-by-a-non-intrusive-pesqnet-2111.03847</loc><lastmod>2021-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-noise-suppression-maximizing-non-differentiable-pesq-mediated-by-a-non-intrusive-pesqnet-2111.03847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-noise-suppression-maximizing-non-differentiable-pesq-mediated-by-a-non-intrusive-pesqnet-2111.03847"/></url>
<url><loc>https://scifaro.com/en/abs/limuse-lightweight-multi-modal-speaker-extraction-2111.04063</loc><lastmod>2022-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/limuse-lightweight-multi-modal-speaker-extraction-2111.04063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/limuse-lightweight-multi-modal-speaker-extraction-2111.04063"/></url>
<url><loc>https://scifaro.com/en/abs/inter-channel-conv-tasnet-for-multichannel-speech-enhancement-2111.04312</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inter-channel-conv-tasnet-for-multichannel-speech-enhancement-2111.04312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inter-channel-conv-tasnet-for-multichannel-speech-enhancement-2111.04312"/></url>
<url><loc>https://scifaro.com/en/abs/rawboost-a-raw-data-boosting-and-augmentation-method-applied-to-automatic-speaker-verification-anti-spoofing-2111.04433</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rawboost-a-raw-data-boosting-and-augmentation-method-applied-to-automatic-speaker-verification-anti-spoofing-2111.04433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rawboost-a-raw-data-boosting-and-augmentation-method-applied-to-automatic-speaker-verification-anti-spoofing-2111.04433"/></url>
<url><loc>https://scifaro.com/en/abs/learning-filterbanks-for-end-to-end-acoustic-beamforming-2111.04614</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-filterbanks-for-end-to-end-acoustic-beamforming-2111.04614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-filterbanks-for-end-to-end-acoustic-beamforming-2111.04614"/></url>
<url><loc>https://scifaro.com/en/abs/a-hemispheric-two-channel-code-accounts-for-binaural-unmasking-in-humans-2111.04637</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hemispheric-two-channel-code-accounts-for-binaural-unmasking-in-humans-2111.04637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hemispheric-two-channel-code-accounts-for-binaural-unmasking-in-humans-2111.04637"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-prosody-control-for-speech-generation-2111.04730</loc><lastmod>2021-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-prosody-control-for-speech-generation-2111.04730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-prosody-control-for-speech-generation-2111.04730"/></url>
<url><loc>https://scifaro.com/en/abs/joint-neural-aec-and-beamforming-with-double-talk-detection-2111.04904</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-neural-aec-and-beamforming-with-double-talk-detection-2111.04904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-neural-aec-and-beamforming-with-double-talk-detection-2111.04904"/></url>
<url><loc>https://scifaro.com/en/abs/hasa-net-a-non-intrusive-hearing-aid-speech-assessment-network-2111.05691</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hasa-net-a-non-intrusive-hearing-aid-speech-assessment-network-2111.05691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hasa-net-a-non-intrusive-hearing-aid-speech-assessment-network-2111.05691"/></url>
<url><loc>https://scifaro.com/en/abs/ossem-one-shot-speaker-adaptive-speech-enhancement-using-meta-learning-2111.05703</loc><lastmod>2021-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ossem-one-shot-speaker-adaptive-speech-enhancement-using-meta-learning-2111.05703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ossem-one-shot-speaker-adaptive-speech-enhancement-using-meta-learning-2111.05703"/></url>
<url><loc>https://scifaro.com/en/abs/uformer-a-unet-based-dilated-complex-real-dual-path-conformer-network-for-simultaneous-speech-enhancement-and-dereverberation-2111.06015</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uformer-a-unet-based-dilated-complex-real-dual-path-conformer-network-for-simultaneous-speech-enhancement-and-dereverberation-2111.06015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uformer-a-unet-based-dilated-complex-real-dual-path-conformer-network-for-simultaneous-speech-enhancement-and-dereverberation-2111.06015"/></url>
<url><loc>https://scifaro.com/en/abs/multisv-dataset-for-far-field-multi-channel-speaker-verification-2111.06458</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multisv-dataset-for-far-field-multi-channel-speaker-verification-2111.06458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multisv-dataset-for-far-field-multi-channel-speaker-verification-2111.06458"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-physical-parameters-for-anomalous-sound-detection-under-domain-shifts-2111.06539</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-physical-parameters-for-anomalous-sound-detection-under-domain-shifts-2111.06539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-physical-parameters-for-anomalous-sound-detection-under-domain-shifts-2111.06539"/></url>
<url><loc>https://scifaro.com/en/abs/ac-vc-non-parallel-low-latency-phonetic-posteriorgrams-based-voice-conversion-2111.06601</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ac-vc-non-parallel-low-latency-phonetic-posteriorgrams-based-voice-conversion-2111.06601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ac-vc-non-parallel-low-latency-phonetic-posteriorgrams-based-voice-conversion-2111.06601"/></url>
<url><loc>https://scifaro.com/en/abs/hlt-nus-submission-for-2020-nist-conversational-telephone-speech-sre-2111.06671</loc><lastmod>2021-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hlt-nus-submission-for-2020-nist-conversational-telephone-speech-sre-2111.06671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hlt-nus-submission-for-2020-nist-conversational-telephone-speech-sre-2111.06671"/></url>
<url><loc>https://scifaro.com/en/abs/meta-voice-fast-few-shot-style-transfer-for-expressive-voice-cloning-using-meta-learning-2111.07218</loc><lastmod>2021-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-voice-fast-few-shot-style-transfer-for-expressive-voice-cloning-using-meta-learning-2111.07218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-voice-fast-few-shot-style-transfer-for-expressive-voice-cloning-using-meta-learning-2111.07218"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-source-separation-from-anechoic-to-reverberant-environments-2111.07578</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-source-separation-from-anechoic-to-reverberant-environments-2111.07578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-source-separation-from-anechoic-to-reverberant-environments-2111.07578"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-self-supervised-front-ends-for-speech-spoofing-countermeasures-2111.07725</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-self-supervised-front-ends-for-speech-spoofing-countermeasures-2111.07725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-self-supervised-front-ends-for-speech-spoofing-countermeasures-2111.07725"/></url>
<url><loc>https://scifaro.com/en/abs/joint-far-and-near-end-speech-intelligibility-enhancement-based-on-the-approximated-speech-intelligibility-index-2111.07759</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-far-and-near-end-speech-intelligibility-enhancement-based-on-the-approximated-speech-intelligibility-index-2111.07759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-far-and-near-end-speech-intelligibility-enhancement-based-on-the-approximated-speech-intelligibility-index-2111.07759"/></url>
<url><loc>https://scifaro.com/en/abs/biologically-inspired-speech-emotion-recognition-2111.08112</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/biologically-inspired-speech-emotion-recognition-2111.08112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/biologically-inspired-speech-emotion-recognition-2111.08112"/></url>
<url><loc>https://scifaro.com/en/abs/speech-prediction-using-an-adaptive-recurrent-neural-network-with-application-to-packet-loss-concealment-2111.08116</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-prediction-using-an-adaptive-recurrent-neural-network-with-application-to-packet-loss-concealment-2111.08116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-prediction-using-an-adaptive-recurrent-neural-network-with-application-to-packet-loss-concealment-2111.08116"/></url>
<url><loc>https://scifaro.com/en/abs/salsa-lite-a-fast-and-effective-feature-for-polyphonic-sound-event-localization-and-detection-with-microphone-arrays-2111.08192</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salsa-lite-a-fast-and-effective-feature-for-polyphonic-sound-event-localization-and-detection-with-microphone-arrays-2111.08192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salsa-lite-a-fast-and-effective-feature-for-polyphonic-sound-event-localization-and-detection-with-microphone-arrays-2111.08192"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-multi-hypothesis-fusion-for-speech-summarization-2111.08201</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-multi-hypothesis-fusion-for-speech-summarization-2111.08201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-multi-hypothesis-fusion-for-speech-summarization-2111.08201"/></url>
<url><loc>https://scifaro.com/en/abs/s-dccrn-super-wide-band-dccrn-with-learnable-complex-feature-for-speech-enhancement-2111.08387</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s-dccrn-super-wide-band-dccrn-with-learnable-complex-feature-for-speech-enhancement-2111.08387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s-dccrn-super-wide-band-dccrn-with-learnable-complex-feature-for-speech-enhancement-2111.08387"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-separation-using-soft-minimum-permutation-invariant-training-2111.08635</loc><lastmod>2021-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-separation-using-soft-minimum-permutation-invariant-training-2111.08635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-separation-using-soft-minimum-permutation-invariant-training-2111.08635"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-speech-recognition-embedding-and-disentanglement-losses-2111.08678</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-speech-recognition-embedding-and-disentanglement-losses-2111.08678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-speech-recognition-embedding-and-disentanglement-losses-2111.08678"/></url>
<url><loc>https://scifaro.com/en/abs/bloom-net-blockwise-optimization-for-masking-networks-toward-scalable-and-efficient-speech-enhancement-2111.09372</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bloom-net-blockwise-optimization-for-masking-networks-toward-scalable-and-efficient-speech-enhancement-2111.09372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bloom-net-blockwise-optimization-for-masking-networks-toward-scalable-and-efficient-speech-enhancement-2111.09372"/></url>
<url><loc>https://scifaro.com/en/abs/a-conformer-based-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-speech-separation-2111.09935</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-conformer-based-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-speech-separation-2111.09935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-conformer-based-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-speech-separation-2111.09935"/></url>
<url><loc>https://scifaro.com/en/abs/towards-measuring-fairness-in-speech-recognition-casual-conversations-dataset-transcriptions-2111.09983</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-measuring-fairness-in-speech-recognition-casual-conversations-dataset-transcriptions-2111.09983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-measuring-fairness-in-speech-recognition-casual-conversations-dataset-transcriptions-2111.09983"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-streaming-models-and-data-augmentation-methods-for-robust-speech-recognition-2111.10043</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-streaming-models-and-data-augmentation-methods-for-robust-speech-recognition-2111.10043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-streaming-models-and-data-augmentation-methods-for-robust-speech-recognition-2111.10043"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-transfer-learning-for-language-expansion-of-end-to-end-speech-recognition-models-to-low-resource-languages-2111.10047</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-transfer-learning-for-language-expansion-of-end-to-end-speech-recognition-models-to-low-resource-languages-2111.10047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-transfer-learning-for-language-expansion-of-end-to-end-speech-recognition-models-to-low-resource-languages-2111.10047"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-emotion-recognition-with-high-level-speech-and-text-features-2111.10202</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-with-high-level-speech-and-text-features-2111.10202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-emotion-recognition-with-high-level-speech-and-text-features-2111.10202"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-study-of-speech-analysis-methods-to-predict-parkinson-s-disease-2111.10207</loc><lastmod>2021-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-study-of-speech-analysis-methods-to-predict-parkinson-s-disease-2111.10207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-study-of-speech-analysis-methods-to-predict-parkinson-s-disease-2111.10207"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-end-to-end-speech-recognition-for-voice-search-in-hindi-and-english-2111.10208</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-end-to-end-speech-recognition-for-voice-search-in-hindi-and-english-2111.10208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-end-to-end-speech-recognition-for-voice-search-in-hindi-and-english-2111.10208"/></url>
<url><loc>https://scifaro.com/en/abs/switching-independent-vector-analysis-and-its-extension-to-blind-and-spatially-guided-convolutional-beamforming-algorithms-2111.10574</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/switching-independent-vector-analysis-and-its-extension-to-blind-and-spatially-guided-convolutional-beamforming-algorithms-2111.10574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/switching-independent-vector-analysis-and-its-extension-to-blind-and-spatially-guided-convolutional-beamforming-algorithms-2111.10574"/></url>
<url><loc>https://scifaro.com/en/abs/active-restoration-of-lost-audio-signals-using-machine-learning-and-latent-information-2111.10891</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-restoration-of-lost-audio-signals-using-machine-learning-and-latent-information-2111.10891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-restoration-of-lost-audio-signals-using-machine-learning-and-latent-information-2111.10891"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-reproduction-with-weighted-mode-matching-and-infinite-dimensional-harmonic-analysis-an-experimental-evaluation-2111.11045</loc><lastmod>2021-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-reproduction-with-weighted-mode-matching-and-infinite-dimensional-harmonic-analysis-an-experimental-evaluation-2111.11045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-reproduction-with-weighted-mode-matching-and-infinite-dimensional-harmonic-analysis-an-experimental-evaluation-2111.11045"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-noise-suppression-losses-on-speech-distortion-and-asr-performance-2111.11606</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-noise-suppression-losses-on-speech-distortion-and-asr-performance-2111.11606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-noise-suppression-losses-on-speech-distortion-and-asr-performance-2111.11606"/></url>
<url><loc>https://scifaro.com/en/abs/speechmoe2-mixture-of-experts-model-with-improved-routing-2111.11831</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechmoe2-mixture-of-experts-model-with-improved-routing-2111.11831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechmoe2-mixture-of-experts-model-with-improved-routing-2111.11831"/></url>
<url><loc>https://scifaro.com/en/abs/dataset-of-spatial-room-impulse-responses-in-a-variable-acoustics-room-for-six-degrees-of-freedom-rendering-and-analysis-2111.11882</loc><lastmod>2021-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dataset-of-spatial-room-impulse-responses-in-a-variable-acoustics-room-for-six-degrees-of-freedom-rendering-and-analysis-2111.11882"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dataset-of-spatial-room-impulse-responses-in-a-variable-acoustics-room-for-six-degrees-of-freedom-rendering-and-analysis-2111.11882"/></url>
<url><loc>https://scifaro.com/en/abs/kuielab-mdx-net-a-two-stream-neural-network-for-music-demixing-2111.12203</loc><lastmod>2021-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kuielab-mdx-net-a-two-stream-neural-network-for-music-demixing-2111.12203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kuielab-mdx-net-a-two-stream-neural-network-for-music-demixing-2111.12203"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-voice-conversion-for-style-transfer-based-on-speaker-adaptation-2111.12277</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-voice-conversion-for-style-transfer-based-on-speaker-adaptation-2111.12277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-voice-conversion-for-style-transfer-based-on-speaker-adaptation-2111.12277"/></url>
<url><loc>https://scifaro.com/en/abs/lightsaft-lightweight-latent-source-aware-frequency-transform-for-source-separation-2111.12516</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightsaft-lightweight-latent-source-aware-frequency-transform-for-source-separation-2111.12516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightsaft-lightweight-latent-source-aware-frequency-transform-for-source-separation-2111.12516"/></url>
<url><loc>https://scifaro.com/en/abs/learning-source-aware-representations-of-music-in-a-discrete-latent-space-2111.13321</loc><lastmod>2021-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-source-aware-representations-of-music-in-a-discrete-latent-space-2111.13321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-source-aware-representations-of-music-in-a-discrete-latent-space-2111.13321"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-online-speaker-diarization-with-graph-based-label-generation-2111.13803</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-online-speaker-diarization-with-graph-based-label-generation-2111.13803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-online-speaker-diarization-with-graph-based-label-generation-2111.13803"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-with-jukebox-for-music-source-separation-2111.14200</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-with-jukebox-for-music-source-separation-2111.14200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-with-jukebox-for-music-source-separation-2111.14200"/></url>
<url><loc>https://scifaro.com/en/abs/do-we-still-need-automatic-speech-recognition-for-spoken-language-understanding-2111.14842</loc><lastmod>2021-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-we-still-need-automatic-speech-recognition-for-spoken-language-understanding-2111.14842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-we-still-need-automatic-speech-recognition-for-spoken-language-understanding-2111.14842"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-through-cross-modal-conditional-teacher-student-training-for-speech-emotion-recognition-2112.00158</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-through-cross-modal-conditional-teacher-student-training-for-speech-emotion-recognition-2112.00158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-through-cross-modal-conditional-teacher-student-training-for-speech-emotion-recognition-2112.00158"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-lexical-skills-from-oral-reading-with-acoustic-measures-2112.00635</loc><lastmod>2021-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-lexical-skills-from-oral-reading-with-acoustic-measures-2112.00635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-lexical-skills-from-oral-reading-with-acoustic-measures-2112.00635"/></url>
<url><loc>https://scifaro.com/en/abs/a-higher-order-minkowski-loss-for-improved-prediction-ability-of-acoustic-model-in-asr-2112.01023</loc><lastmod>2021-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-higher-order-minkowski-loss-for-improved-prediction-ability-of-acoustic-model-in-asr-2112.01023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-higher-order-minkowski-loss-for-improved-prediction-ability-of-acoustic-model-in-asr-2112.01023"/></url>
<url><loc>https://scifaro.com/en/abs/a-mixture-of-expert-based-deep-neural-network-for-improved-asr-2112.01025</loc><lastmod>2021-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-mixture-of-expert-based-deep-neural-network-for-improved-asr-2112.01025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-mixture-of-expert-based-deep-neural-network-for-improved-asr-2112.01025"/></url>
<url><loc>https://scifaro.com/en/abs/toward-real-world-voice-disorder-classification-2112.02538</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-real-world-voice-disorder-classification-2112.02538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-real-world-voice-disorder-classification-2112.02538"/></url>
<url><loc>https://scifaro.com/en/abs/steerable-discovery-of-neural-audio-effects-2112.02926</loc><lastmod>2021-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steerable-discovery-of-neural-audio-effects-2112.02926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steerable-discovery-of-neural-audio-effects-2112.02926"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speech-representation-learning-via-flow-based-embedding-regularization-2112.03454</loc><lastmod>2021-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speech-representation-learning-via-flow-based-embedding-regularization-2112.03454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speech-representation-learning-via-flow-based-embedding-regularization-2112.03454"/></url>
<url><loc>https://scifaro.com/en/abs/a-time-domain-real-valued-generalized-wiener-filter-for-multi-channel-neural-separation-systems-2112.03533</loc><lastmod>2022-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-time-domain-real-valued-generalized-wiener-filter-for-multi-channel-neural-separation-systems-2112.03533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-time-domain-real-valued-generalized-wiener-filter-for-multi-channel-neural-separation-systems-2112.03533"/></url>
<url><loc>https://scifaro.com/en/abs/danna-sep-unite-to-separate-them-all-2112.03752</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/danna-sep-unite-to-separate-them-all-2112.03752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/danna-sep-unite-to-separate-them-all-2112.03752"/></url>
<url><loc>https://scifaro.com/en/abs/training-end-to-end-speech-to-text-models-on-mobile-phones-2112.03871</loc><lastmod>2021-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-end-to-end-speech-to-text-models-on-mobile-phones-2112.03871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-end-to-end-speech-to-text-models-on-mobile-phones-2112.03871"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-native-american-english-speech-recognition-by-indian-listeners-with-varying-word-familiarity-level-2112.04151</loc><lastmod>2021-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-native-american-english-speech-recognition-by-indian-listeners-with-varying-word-familiarity-level-2112.04151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-native-american-english-speech-recognition-by-indian-listeners-with-varying-word-familiarity-level-2112.04151"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speaker-verification-with-simple-siamese-network-and-self-supervised-regularization-2112.04459</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speaker-verification-with-simple-siamese-network-and-self-supervised-regularization-2112.04459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speaker-verification-with-simple-siamese-network-and-self-supervised-regularization-2112.04459"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-effect-of-coding-artifacts-on-acoustic-scene-classification-2112.04841</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-effect-of-coding-artifacts-on-acoustic-scene-classification-2112.04841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-effect-of-coding-artifacts-on-acoustic-scene-classification-2112.04841"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-alexa-device-arbitration-2112.04914</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-alexa-device-arbitration-2112.04914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-alexa-device-arbitration-2112.04914"/></url>
<url><loc>https://scifaro.com/en/abs/a-training-framework-for-stereo-aware-speech-enhancement-using-deep-neural-networks-2112.04939</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-training-framework-for-stereo-aware-speech-enhancement-using-deep-neural-networks-2112.04939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-training-framework-for-stereo-aware-speech-enhancement-using-deep-neural-networks-2112.04939"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-and-non-harmonic-based-noisy-reverberant-speech-enhancement-in-time-domain-2112.04949</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-and-non-harmonic-based-noisy-reverberant-speech-enhancement-in-time-domain-2112.04949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-and-non-harmonic-based-noisy-reverberant-speech-enhancement-in-time-domain-2112.04949"/></url>
<url><loc>https://scifaro.com/en/abs/x-vector-based-voice-activity-detection-for-multi-genre-broadcast-speech-to-text-2112.05016</loc><lastmod>2021-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-vector-based-voice-activity-detection-for-multi-genre-broadcast-speech-to-text-2112.05016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-vector-based-voice-activity-detection-for-multi-genre-broadcast-speech-to-text-2112.05016"/></url>
<url><loc>https://scifaro.com/en/abs/learning-based-personal-speech-enhancement-for-teleconferencing-by-exploiting-spatial-spectral-features-2112.05686</loc><lastmod>2022-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-based-personal-speech-enhancement-for-teleconferencing-by-exploiting-spatial-spectral-features-2112.05686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-based-personal-speech-enhancement-for-teleconferencing-by-exploiting-spatial-spectral-features-2112.05686"/></url>
<url><loc>https://scifaro.com/en/abs/directed-speech-separation-for-automatic-speech-recognition-of-long-form-conversational-speech-2112.05863</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/directed-speech-separation-for-automatic-speech-recognition-of-long-form-conversational-speech-2112.05863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/directed-speech-separation-for-automatic-speech-recognition-of-long-form-conversational-speech-2112.05863"/></url>
<url><loc>https://scifaro.com/en/abs/importantaug-a-data-augmentation-agent-for-speech-2112.07156</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/importantaug-a-data-augmentation-agent-for-speech-2112.07156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/importantaug-a-data-augmentation-agent-for-speech-2112.07156"/></url>
<url><loc>https://scifaro.com/en/abs/spatiogram-a-phase-based-directional-angular-measure-and-perceptual-weighting-for-ensemble-source-width-2112.07216</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatiogram-a-phase-based-directional-angular-measure-and-perceptual-weighting-for-ensemble-source-width-2112.07216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatiogram-a-phase-based-directional-angular-measure-and-perceptual-weighting-for-ensemble-source-width-2112.07216"/></url>
<url><loc>https://scifaro.com/en/abs/improving-hybrid-ctc-attention-end-to-end-speech-recognition-with-pretrained-acoustic-and-language-model-2112.07254</loc><lastmod>2021-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-hybrid-ctc-attention-end-to-end-speech-recognition-with-pretrained-acoustic-and-language-model-2112.07254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-hybrid-ctc-attention-end-to-end-speech-recognition-with-pretrained-acoustic-and-language-model-2112.07254"/></url>
<url><loc>https://scifaro.com/en/abs/robustifying-automatic-speech-recognition-by-extracting-slowly-varying-features-2112.07400</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustifying-automatic-speech-recognition-by-extracting-slowly-varying-features-2112.07400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustifying-automatic-speech-recognition-by-extracting-slowly-varying-features-2112.07400"/></url>
<url><loc>https://scifaro.com/en/abs/visualizing-ensemble-predictions-of-music-mood-2112.07627</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizing-ensemble-predictions-of-music-mood-2112.07627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizing-ensemble-predictions-of-music-mood-2112.07627"/></url>
<url><loc>https://scifaro.com/en/abs/rawnext-speaker-verification-system-for-variable-duration-utterances-with-deep-layer-aggregation-and-extended-dynamic-scaling-policies-2112.07935</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rawnext-speaker-verification-system-for-variable-duration-utterances-with-deep-layer-aggregation-and-extended-dynamic-scaling-policies-2112.07935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rawnext-speaker-verification-system-for-variable-duration-utterances-with-deep-layer-aggregation-and-extended-dynamic-scaling-policies-2112.07935"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-speech-recognition-with-intermediate-layer-supervision-2112.08778</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-recognition-with-intermediate-layer-supervision-2112.08778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-recognition-with-intermediate-layer-supervision-2112.08778"/></url>
<url><loc>https://scifaro.com/en/abs/bootstrap-equilibrium-and-probabilistic-speaker-representation-learning-for-self-supervised-speaker-verification-2112.08929</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bootstrap-equilibrium-and-probabilistic-speaker-representation-learning-for-self-supervised-speaker-verification-2112.08929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bootstrap-equilibrium-and-probabilistic-speaker-representation-learning-for-self-supervised-speaker-verification-2112.08929"/></url>
<url><loc>https://scifaro.com/en/abs/object-based-synthesis-of-scraping-and-rolling-sounds-based-on-non-linear-physical-constraints-2112.08984</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/object-based-synthesis-of-scraping-and-rolling-sounds-based-on-non-linear-physical-constraints-2112.08984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/object-based-synthesis-of-scraping-and-rolling-sounds-based-on-non-linear-physical-constraints-2112.08984"/></url>
<url><loc>https://scifaro.com/en/abs/bioacoustic-event-detection-with-prototypical-networks-and-data-augmentation-2112.09006</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bioacoustic-event-detection-with-prototypical-networks-and-data-augmentation-2112.09006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bioacoustic-event-detection-with-prototypical-networks-and-data-augmentation-2112.09006"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-species-agnostic-bird-activity-detection-2112.09042</loc><lastmod>2021-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-species-agnostic-bird-activity-detection-2112.09042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-species-agnostic-bird-activity-detection-2112.09042"/></url>
<url><loc>https://scifaro.com/en/abs/audio-retrieval-with-natural-language-queries-a-benchmark-study-2112.09418</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-retrieval-with-natural-language-queries-a-benchmark-study-2112.09418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-retrieval-with-natural-language-queries-a-benchmark-study-2112.09418"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-monolingual-end-to-end-automatic-speech-recognition-2112.09427</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-monolingual-end-to-end-automatic-speech-recognition-2112.09427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-monolingual-end-to-end-automatic-speech-recognition-2112.09427"/></url>
<url><loc>https://scifaro.com/en/abs/dialog-in-broadcasting-first-field-tests-using-deep-learning-based-dialogue-enhancement-2112.09494</loc><lastmod>2021-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialog-in-broadcasting-first-field-tests-using-deep-learning-based-dialogue-enhancement-2112.09494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialog-in-broadcasting-first-field-tests-using-deep-learning-based-dialogue-enhancement-2112.09494"/></url>
<url><loc>https://scifaro.com/en/abs/noisy-speech-based-temporal-decomposition-to-improve-fundamental-frequency-estimation-2112.09896</loc><lastmod>2021-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisy-speech-based-temporal-decomposition-to-improve-fundamental-frequency-estimation-2112.09896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisy-speech-based-temporal-decomposition-to-improve-fundamental-frequency-estimation-2112.09896"/></url>
<url><loc>https://scifaro.com/en/abs/multi-turn-rnn-t-for-streaming-recognition-of-multi-party-speech-2112.10200</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-turn-rnn-t-for-streaming-recognition-of-multi-party-speech-2112.10200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-turn-rnn-t-for-streaming-recognition-of-multi-party-speech-2112.10200"/></url>
<url><loc>https://scifaro.com/en/abs/multi-singer-fast-multi-singer-singing-voice-vocoder-with-a-large-scale-corpus-2112.10358</loc><lastmod>2021-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-singer-fast-multi-singer-singing-voice-vocoder-with-a-large-scale-corpus-2112.10358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-singer-fast-multi-singer-singing-voice-vocoder-with-a-large-scale-corpus-2112.10358"/></url>
<url><loc>https://scifaro.com/en/abs/augmented-contrastive-self-supervised-learning-for-audio-invariant-representations-2112.10950</loc><lastmod>2021-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmented-contrastive-self-supervised-learning-for-audio-invariant-representations-2112.10950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmented-contrastive-self-supervised-learning-for-audio-invariant-representations-2112.10950"/></url>
<url><loc>https://scifaro.com/en/abs/the-phonetic-footprint-of-parkinson-s-disease-2112.11514</loc><lastmod>2021-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-phonetic-footprint-of-parkinson-s-disease-2112.11514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-phonetic-footprint-of-parkinson-s-disease-2112.11514"/></url>
<url><loc>https://scifaro.com/en/abs/nonnegative-opls-for-supervised-design-of-filter-banks-application-to-image-and-audio-feature-extraction-2112.12280</loc><lastmod>2021-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonnegative-opls-for-supervised-design-of-filter-banks-application-to-image-and-audio-feature-extraction-2112.12280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonnegative-opls-for-supervised-design-of-filter-banks-application-to-image-and-audio-feature-extraction-2112.12280"/></url>
<url><loc>https://scifaro.com/en/abs/are-e2e-asr-models-ready-for-an-industrial-usage-2112.12572</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-e2e-asr-models-ready-for-an-industrial-usage-2112.12572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-e2e-asr-models-ready-for-an-industrial-usage-2112.12572"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-multi-style-text-to-speech-synthesis-with-single-speaker-single-style-training-data-scenarios-2112.12743</loc><lastmod>2021-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-multi-style-text-to-speech-synthesis-with-single-speaker-single-style-training-data-scenarios-2112.12743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-multi-style-text-to-speech-synthesis-with-single-speaker-single-style-training-data-scenarios-2112.12743"/></url>
<url><loc>https://scifaro.com/en/abs/aida-an-active-inference-based-design-agent-for-audio-processing-algorithms-2112.13366</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aida-an-active-inference-based-design-agent-for-audio-processing-algorithms-2112.13366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aida-an-active-inference-based-design-agent-for-audio-processing-algorithms-2112.13366"/></url>
<url><loc>https://scifaro.com/en/abs/dpccn-densely-connected-pyramid-complex-convolutional-network-for-robust-speech-separation-and-extraction-2112.13520</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpccn-densely-connected-pyramid-complex-convolutional-network-for-robust-speech-separation-and-extraction-2112.13520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpccn-densely-connected-pyramid-complex-convolutional-network-for-robust-speech-separation-and-extraction-2112.13520"/></url>
<url><loc>https://scifaro.com/en/abs/task-specific-optimization-of-virtual-channel-linear-prediction-based-speech-dereverberation-front-end-for-far-field-speaker-verification-2112.13569</loc><lastmod>2021-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-specific-optimization-of-virtual-channel-linear-prediction-based-speech-dereverberation-front-end-for-far-field-speaker-verification-2112.13569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-specific-optimization-of-virtual-channel-linear-prediction-based-speech-dereverberation-front-end-for-far-field-speaker-verification-2112.13569"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dialect-arabic-speech-recognition-2112.14678</loc><lastmod>2021-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dialect-arabic-speech-recognition-2112.14678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dialect-arabic-speech-recognition-2112.14678"/></url>
<url><loc>https://scifaro.com/en/abs/iqdubbing-prosody-modeling-based-on-discrete-self-supervised-speech-representation-for-expressive-voice-conversion-2201.00269</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iqdubbing-prosody-modeling-based-on-discrete-self-supervised-speech-representation-for-expressive-voice-conversion-2201.00269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iqdubbing-prosody-modeling-based-on-discrete-self-supervised-speech-representation-for-expressive-voice-conversion-2201.00269"/></url>
<url><loc>https://scifaro.com/en/abs/tfcn-temporal-frequential-convolutional-network-for-single-channel-speech-enhancement-2201.00480</loc><lastmod>2022-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tfcn-temporal-frequential-convolutional-network-for-single-channel-speech-enhancement-2201.00480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tfcn-temporal-frequential-convolutional-network-for-single-channel-speech-enhancement-2201.00480"/></url>
<url><loc>https://scifaro.com/en/abs/signal-aware-direction-of-arrival-estimation-using-attention-mechanisms-2201.00503</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-aware-direction-of-arrival-estimation-using-attention-mechanisms-2201.00503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-aware-direction-of-arrival-estimation-using-attention-mechanisms-2201.00503"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-transformation-of-spoofing-attacks-for-voice-biometrics-2201.01226</loc><lastmod>2022-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-transformation-of-spoofing-attacks-for-voice-biometrics-2201.01226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-transformation-of-spoofing-attacks-for-voice-biometrics-2201.01226"/></url>
<url><loc>https://scifaro.com/en/abs/towards-maximizing-a-perceptual-sweet-spot-2201.01461</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-maximizing-a-perceptual-sweet-spot-2201.01461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-maximizing-a-perceptual-sweet-spot-2201.01461"/></url>
<url><loc>https://scifaro.com/en/abs/formant-tracking-using-quasi-closed-phase-forward-backward-linear-prediction-analysis-and-deep-neural-networks-2201.01525</loc><lastmod>2022-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/formant-tracking-using-quasi-closed-phase-forward-backward-linear-prediction-analysis-and-deep-neural-networks-2201.01525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/formant-tracking-using-quasi-closed-phase-forward-backward-linear-prediction-analysis-and-deep-neural-networks-2201.01525"/></url>
<url><loc>https://scifaro.com/en/abs/using-deep-learning-with-large-aggregated-datasets-for-covid-19-classification-from-cough-2201.01669</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-deep-learning-with-large-aggregated-datasets-for-covid-19-classification-from-cough-2201.01669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-deep-learning-with-large-aggregated-datasets-for-covid-19-classification-from-cough-2201.01669"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-visual-speech-representation-by-masked-multimodal-cluster-prediction-2201.02184</loc><lastmod>2022-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-visual-speech-representation-by-masked-multimodal-cluster-prediction-2201.02184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-visual-speech-representation-by-masked-multimodal-cluster-prediction-2201.02184"/></url>
<url><loc>https://scifaro.com/en/abs/two-pass-end-to-end-asr-model-compression-2201.02741</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-pass-end-to-end-asr-model-compression-2201.02741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-pass-end-to-end-asr-model-compression-2201.02741"/></url>
<url><loc>https://scifaro.com/en/abs/noisy-neonatal-chest-sound-separation-for-high-quality-heart-and-lung-sounds-2201.03211</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noisy-neonatal-chest-sound-separation-for-high-quality-heart-and-lung-sounds-2201.03211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noisy-neonatal-chest-sound-separation-for-high-quality-heart-and-lung-sounds-2201.03211"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-asr-post-processing-system-for-error-correction-and-utterance-rejection-2201.03313</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-asr-post-processing-system-for-error-correction-and-utterance-rejection-2201.03313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-asr-post-processing-system-for-error-correction-and-utterance-rejection-2201.03313"/></url>
<url><loc>https://scifaro.com/en/abs/a-practical-guide-to-logical-access-voice-presentation-attack-detection-2201.03321</loc><lastmod>2022-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-practical-guide-to-logical-access-voice-presentation-attack-detection-2201.03321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-practical-guide-to-logical-access-voice-presentation-attack-detection-2201.03321"/></url>
<url><loc>https://scifaro.com/en/abs/mr-svs-singing-voice-synthesis-with-multi-reference-encoder-2201.03864</loc><lastmod>2022-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mr-svs-singing-voice-synthesis-with-multi-reference-encoder-2201.03864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mr-svs-singing-voice-synthesis-with-multi-reference-encoder-2201.03864"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-enhance-or-not-neural-network-based-switching-of-enhanced-and-observed-signals-for-overlapping-speech-recognition-2201.03881</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-enhance-or-not-neural-network-based-switching-of-enhanced-and-observed-signals-for-overlapping-speech-recognition-2201.03881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-enhance-or-not-neural-network-based-switching-of-enhanced-and-observed-signals-for-overlapping-speech-recognition-2201.03881"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2201.03943</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2201.03943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-for-lf-mmi-trained-time-delay-neural-networks-2201.03943"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-classification-algorithms-for-covid19-detection-using-cough-acoustic-signals-2201.04872</loc><lastmod>2025-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-classification-algorithms-for-covid19-detection-using-cough-acoustic-signals-2201.04872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-classification-algorithms-for-covid19-detection-using-cough-acoustic-signals-2201.04872"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-transducer-based-end-to-end-asr-with-espnet-architecture-auxiliary-loss-and-decoding-strategies-2201.05420</loc><lastmod>2022-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-transducer-based-end-to-end-asr-with-espnet-architecture-auxiliary-loss-and-decoding-strategies-2201.05420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-transducer-based-end-to-end-asr-with-espnet-architecture-auxiliary-loss-and-decoding-strategies-2201.05420"/></url>
<url><loc>https://scifaro.com/en/abs/kazakhtts2-extending-the-open-source-kazakh-tts-corpus-with-more-data-speakers-and-topics-2201.05771</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kazakhtts2-extending-the-open-source-kazakh-tts-corpus-with-more-data-speakers-and-topics-2201.05771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kazakhtts2-extending-the-open-source-kazakh-tts-corpus-with-more-data-speakers-and-topics-2201.05771"/></url>
<url><loc>https://scifaro.com/en/abs/recent-progress-in-the-cuhk-dysarthric-speech-recognition-system-2201.05845</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-progress-in-the-cuhk-dysarthric-speech-recognition-system-2201.05845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-progress-in-the-cuhk-dysarthric-speech-recognition-system-2201.05845"/></url>
<url><loc>https://scifaro.com/en/abs/common-phone-a-multilingual-dataset-for-robust-acoustic-modelling-2201.05912</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/common-phone-a-multilingual-dataset-for-robust-acoustic-modelling-2201.05912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/common-phone-a-multilingual-dataset-for-robust-acoustic-modelling-2201.05912"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-covid-19-prediction-performances-of-normalization-methods-on-cough-acoustics-sounds-2201.06078</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-covid-19-prediction-performances-of-normalization-methods-on-cough-acoustics-sounds-2201.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-covid-19-prediction-performances-of-normalization-methods-on-cough-acoustics-sounds-2201.06078"/></url>
<url><loc>https://scifaro.com/en/abs/how-bad-are-artifacts-analyzing-the-impact-of-speech-enhancement-errors-on-asr-2201.06685</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-bad-are-artifacts-analyzing-the-impact-of-speech-enhancement-errors-on-asr-2201.06685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-bad-are-artifacts-analyzing-the-impact-of-speech-enhancement-errors-on-asr-2201.06685"/></url>
<url><loc>https://scifaro.com/en/abs/human-and-automatic-speech-recognition-performance-on-german-oral-history-interviews-2201.06841</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/human-and-automatic-speech-recognition-performance-on-german-oral-history-interviews-2201.06841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/human-and-automatic-speech-recognition-performance-on-german-oral-history-interviews-2201.06841"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-the-ambiguity-in-human-annotation-of-german-oral-history-interviews-for-perceived-emotion-recognition-and-sentiment-analysis-2201.06868</loc><lastmod>2022-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-the-ambiguity-in-human-annotation-of-german-oral-history-interviews-for-perceived-emotion-recognition-and-sentiment-analysis-2201.06868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-the-ambiguity-in-human-annotation-of-german-oral-history-interviews-for-perceived-emotion-recognition-and-sentiment-analysis-2201.06868"/></url>
<url><loc>https://scifaro.com/en/abs/a-noise-robust-self-supervised-pre-training-model-based-speech-representation-learning-for-automatic-speech-recognition-2201.08930</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-noise-robust-self-supervised-pre-training-model-based-speech-representation-learning-for-automatic-speech-recognition-2201.08930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-noise-robust-self-supervised-pre-training-model-based-speech-representation-learning-for-automatic-speech-recognition-2201.08930"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-and-self-supervised-pretraining-based-covid-19-detection-using-acoustic-breathing-cough-speech-signals-2201.08934</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-and-self-supervised-pretraining-based-covid-19-detection-using-acoustic-breathing-cough-speech-signals-2201.08934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-and-self-supervised-pretraining-based-covid-19-detection-using-acoustic-breathing-cough-speech-signals-2201.08934"/></url>
<url><loc>https://scifaro.com/en/abs/variational-auto-encoder-based-variability-encoding-for-dysarthric-speech-recognition-2201.09422</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-auto-encoder-based-variability-encoding-for-dysarthric-speech-recognition-2201.09422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-auto-encoder-based-variability-encoding-for-dysarthric-speech-recognition-2201.09422"/></url>
<url><loc>https://scifaro.com/en/abs/polyphone-disambiguation-and-accent-prediction-using-pre-trained-language-models-in-japanese-tts-front-end-2201.09427</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphone-disambiguation-and-accent-prediction-using-pre-trained-language-models-in-japanese-tts-front-end-2201.09427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphone-disambiguation-and-accent-prediction-using-pre-trained-language-models-in-japanese-tts-front-end-2201.09427"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-deep-neural-network-acoustic-modelling-approaches-for-low-resource-accented-mandarin-speech-recognition-2201.09432</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-deep-neural-network-acoustic-modelling-approaches-for-low-resource-accented-mandarin-speech-recognition-2201.09432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-deep-neural-network-acoustic-modelling-approaches-for-low-resource-accented-mandarin-speech-recognition-2201.09432"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-speech-detection-using-meta-learning-with-prototypical-loss-2201.09470</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-speech-detection-using-meta-learning-with-prototypical-loss-2201.09470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-speech-detection-using-meta-learning-with-prototypical-loss-2201.09470"/></url>
<url><loc>https://scifaro.com/en/abs/data-and-knowledge-driven-approaches-for-multilingual-training-to-improve-the-performance-of-speech-recognition-systems-of-indian-languages-2201.09494</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-and-knowledge-driven-approaches-for-multilingual-training-to-improve-the-performance-of-speech-recognition-systems-of-indian-languages-2201.09494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-and-knowledge-driven-approaches-for-multilingual-training-to-improve-the-performance-of-speech-recognition-systems-of-indian-languages-2201.09494"/></url>
<url><loc>https://scifaro.com/en/abs/picknet-real-time-channel-selection-for-ad-hoc-microphone-arrays-2201.09586</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/picknet-real-time-channel-selection-for-ad-hoc-microphone-arrays-2201.09586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/picknet-real-time-channel-selection-for-ad-hoc-microphone-arrays-2201.09586"/></url>
<url><loc>https://scifaro.com/en/abs/a-bayesian-permutation-training-deep-representation-learning-method-for-speech-enhancement-with-variational-autoencoder-2201.09875</loc><lastmod>2022-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bayesian-permutation-training-deep-representation-learning-method-for-speech-enhancement-with-variational-autoencoder-2201.09875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bayesian-permutation-training-deep-representation-learning-method-for-speech-enhancement-with-variational-autoencoder-2201.09875"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-temporal-attentive-pooling-based-convolutional-recurrent-architecture-for-acoustic-signal-enhancement-2201.09913</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-temporal-attentive-pooling-based-convolutional-recurrent-architecture-for-acoustic-signal-enhancement-2201.09913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-temporal-attentive-pooling-based-convolutional-recurrent-architecture-for-acoustic-signal-enhancement-2201.09913"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-utility-estimation-in-acoustic-sensor-networks-using-single-channel-signal-features-2201.09946</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-utility-estimation-in-acoustic-sensor-networks-using-single-channel-signal-features-2201.09946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-utility-estimation-in-acoustic-sensor-networks-using-single-channel-signal-features-2201.09946"/></url>
<url><loc>https://scifaro.com/en/abs/endpoint-detection-for-streaming-end-to-end-multi-talker-asr-2201.09979</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/endpoint-detection-for-streaming-end-to-end-multi-talker-asr-2201.09979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/endpoint-detection-for-streaming-end-to-end-multi-talker-asr-2201.09979"/></url>
<url><loc>https://scifaro.com/en/abs/improving-non-autoregressive-end-to-end-speech-recognition-with-pre-trained-acoustic-and-language-models-2201.10103</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-non-autoregressive-end-to-end-speech-recognition-with-pre-trained-acoustic-and-language-models-2201.10103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-non-autoregressive-end-to-end-speech-recognition-with-pre-trained-acoustic-and-language-models-2201.10103"/></url>
<url><loc>https://scifaro.com/en/abs/prediction-of-neonatal-respiratory-distress-in-term-babies-at-birth-from-digital-stethoscope-recorded-chest-sounds-2201.10105</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prediction-of-neonatal-respiratory-distress-in-term-babies-at-birth-from-digital-stethoscope-recorded-chest-sounds-2201.10105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prediction-of-neonatal-respiratory-distress-in-term-babies-at-birth-from-digital-stethoscope-recorded-chest-sounds-2201.10105"/></url>
<url><loc>https://scifaro.com/en/abs/run-and-back-stitch-search-novel-block-synchronous-decoding-for-streaming-encoder-decoder-asr-2201.10190</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/run-and-back-stitch-search-novel-block-synchronous-decoding-for-streaming-encoder-decoder-asr-2201.10190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/run-and-back-stitch-search-novel-block-synchronous-decoding-for-streaming-encoder-decoder-asr-2201.10190"/></url>
<url><loc>https://scifaro.com/en/abs/spiral-self-supervised-perturbation-invariant-representation-learning-for-speech-pre-training-2201.10207</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spiral-self-supervised-perturbation-invariant-representation-learning-for-speech-pre-training-2201.10207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spiral-self-supervised-perturbation-invariant-representation-learning-for-speech-pre-training-2201.10207"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-fusion-of-acoustic-and-text-representations-in-rnn-t-2201.10240</loc><lastmod>2022-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-fusion-of-acoustic-and-text-representations-in-rnn-t-2201.10240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-fusion-of-acoustic-and-text-representations-in-rnn-t-2201.10240"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-long-form-voice-cloning-with-dynamic-convolution-attention-2201.10375</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-long-form-voice-cloning-with-dynamic-convolution-attention-2201.10375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-long-form-voice-cloning-with-dynamic-convolution-attention-2201.10375"/></url>
<url><loc>https://scifaro.com/en/abs/invertible-voice-conversion-2201.10687</loc><lastmod>2022-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/invertible-voice-conversion-2201.10687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/invertible-voice-conversion-2201.10687"/></url>
<url><loc>https://scifaro.com/en/abs/skim-skipping-memory-lstm-for-low-latency-real-time-continuous-speech-separation-2201.10800</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skim-skipping-memory-lstm-for-low-latency-real-time-continuous-speech-separation-2201.10800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skim-skipping-memory-lstm-for-low-latency-real-time-continuous-speech-separation-2201.10800"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-step-backward-compatible-fullband-speech-enhancement-system-2201.10809</loc><lastmod>2022-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-step-backward-compatible-fullband-speech-enhancement-system-2201.10809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-step-backward-compatible-fullband-speech-enhancement-system-2201.10809"/></url>
<url><loc>https://scifaro.com/en/abs/deep-recurrent-learning-for-heart-sounds-segmentation-based-on-instantaneous-frequency-features-2201.11320</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-recurrent-learning-for-heart-sounds-segmentation-based-on-instantaneous-frequency-features-2201.11320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-recurrent-learning-for-heart-sounds-segmentation-based-on-instantaneous-frequency-features-2201.11320"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizing-dysarthric-speech-using-multi-talker-tts-for-dysarthric-speech-recognition-2201.11571</loc><lastmod>2022-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizing-dysarthric-speech-using-multi-talker-tts-for-dysarthric-speech-recognition-2201.11571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizing-dysarthric-speech-using-multi-talker-tts-for-dysarthric-speech-recognition-2201.11571"/></url>
<url><loc>https://scifaro.com/en/abs/internal-language-model-estimation-through-explicit-context-vector-learning-for-attention-based-encoder-decoder-asr-2201.11627</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/internal-language-model-estimation-through-explicit-context-vector-learning-for-attention-based-encoder-decoder-asr-2201.11627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/internal-language-model-estimation-through-explicit-context-vector-learning-for-attention-based-encoder-decoder-asr-2201.11627"/></url>
<url><loc>https://scifaro.com/en/abs/diffgan-tts-high-fidelity-and-efficient-text-to-speech-with-denoising-diffusion-gans-2201.11972</loc><lastmod>2022-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffgan-tts-high-fidelity-and-efficient-text-to-speech-with-denoising-diffusion-gans-2201.11972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffgan-tts-high-fidelity-and-efficient-text-to-speech-with-denoising-diffusion-gans-2201.11972"/></url>
<url><loc>https://scifaro.com/en/abs/a-dnn-based-post-filter-to-enhance-the-quality-of-coded-speech-in-mdct-domain-2201.12039</loc><lastmod>2025-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dnn-based-post-filter-to-enhance-the-quality-of-coded-speech-in-mdct-domain-2201.12039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dnn-based-post-filter-to-enhance-the-quality-of-coded-speech-in-mdct-domain-2201.12039"/></url>
<url><loc>https://scifaro.com/en/abs/polyphonic-audio-event-detection-multi-label-or-multi-class-multi-task-classification-problem-2201.12557</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyphonic-audio-event-detection-multi-label-or-multi-class-multi-task-classification-problem-2201.12557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyphonic-audio-event-detection-multi-label-or-multi-class-multi-task-classification-problem-2201.12557"/></url>
<url><loc>https://scifaro.com/en/abs/hgcn-harmonic-gated-compensation-network-for-speech-enhancement-2201.12755</loc><lastmod>2022-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hgcn-harmonic-gated-compensation-network-for-speech-enhancement-2201.12755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hgcn-harmonic-gated-compensation-network-for-speech-enhancement-2201.12755"/></url>
<url><loc>https://scifaro.com/en/abs/postgan-a-gan-based-post-processor-to-enhance-the-quality-of-coded-speech-2201.13093</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/postgan-a-gan-based-post-processor-to-enhance-the-quality-of-coded-speech-2201.13093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/postgan-a-gan-based-post-processor-to-enhance-the-quality-of-coded-speech-2201.13093"/></url>
<url><loc>https://scifaro.com/en/abs/threshold-independent-evaluation-of-sound-event-detection-scores-2201.13148</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/threshold-independent-evaluation-of-sound-event-detection-scores-2201.13148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/threshold-independent-evaluation-of-sound-event-detection-scores-2201.13148"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-naturalistic-field-acoustic-environments-on-forensic-text-independent-speaker-verification-system-2201.13246</loc><lastmod>2022-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-naturalistic-field-acoustic-environments-on-forensic-text-independent-speaker-verification-system-2201.13246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-naturalistic-field-acoustic-environments-on-forensic-text-independent-speaker-verification-system-2201.13246"/></url>
<url><loc>https://scifaro.com/en/abs/bea-base-a-benchmark-for-asr-of-spontaneous-hungarian-2202.00601</loc><lastmod>2022-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bea-base-a-benchmark-for-asr-of-spontaneous-hungarian-2202.00601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bea-base-a-benchmark-for-asr-of-spontaneous-hungarian-2202.00601"/></url>
<url><loc>https://scifaro.com/en/abs/new-insights-on-target-speaker-extraction-2202.00733</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/new-insights-on-target-speaker-extraction-2202.00733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/new-insights-on-target-speaker-extraction-2202.00733"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-multi-talker-asr-with-token-level-serialized-output-training-2202.00842</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-multi-talker-asr-with-token-level-serialized-output-training-2202.00842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-multi-talker-asr-with-token-level-serialized-output-training-2202.00842"/></url>
<url><loc>https://scifaro.com/en/abs/tonet-tone-octave-network-for-singing-melody-extraction-from-polyphonic-music-2202.00951</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tonet-tone-octave-network-for-singing-melody-extraction-from-polyphonic-music-2202.00951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tonet-tone-octave-network-for-singing-melody-extraction-from-polyphonic-music-2202.00951"/></url>
<url><loc>https://scifaro.com/en/abs/the-coral-algorithm-for-unsupervised-domain-adaptation-of-speaker-recogntion-2202.01092</loc><lastmod>2022-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-coral-algorithm-for-unsupervised-domain-adaptation-of-speaker-recogntion-2202.01092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-coral-algorithm-for-unsupervised-domain-adaptation-of-speaker-recogntion-2202.01092"/></url>
<url><loc>https://scifaro.com/en/abs/rescorebert-discriminative-speech-recognition-rescoring-with-bert-2202.01094</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rescorebert-discriminative-speech-recognition-rescoring-with-bert-2202.01094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rescorebert-discriminative-speech-recognition-rescoring-with-bert-2202.01094"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speech-recognition-and-audio-captioning-2202.01405</loc><lastmod>2022-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speech-recognition-and-audio-captioning-2202.01405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speech-recognition-and-audio-captioning-2202.01405"/></url>
<url><loc>https://scifaro.com/en/abs/a-benchmark-of-state-of-the-art-sound-event-detection-systems-evaluated-on-synthetic-soundscapes-2202.01487</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-benchmark-of-state-of-the-art-sound-event-detection-systems-evaluated-on-synthetic-soundscapes-2202.01487"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-benchmark-of-state-of-the-art-sound-event-detection-systems-evaluated-on-synthetic-soundscapes-2202.01487"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-complex-multi-frame-filtering-network-for-stereophonic-acoustic-echo-cancellation-2202.01630</loc><lastmod>2022-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-complex-multi-frame-filtering-network-for-stereophonic-acoustic-echo-cancellation-2202.01630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-complex-multi-frame-filtering-network-for-stereophonic-acoustic-echo-cancellation-2202.01630"/></url>
<url><loc>https://scifaro.com/en/abs/distortion-audio-effects-learning-how-to-recover-the-clean-signal-2202.01664</loc><lastmod>2022-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distortion-audio-effects-learning-how-to-recover-the-clean-signal-2202.01664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distortion-audio-effects-learning-how-to-recover-the-clean-signal-2202.01664"/></url>
<url><loc>https://scifaro.com/en/abs/the-cuhk-tencent-speaker-diarization-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.01986</loc><lastmod>2022-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-cuhk-tencent-speaker-diarization-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.01986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-cuhk-tencent-speaker-diarization-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-challenge-2202.01986"/></url>
<url><loc>https://scifaro.com/en/abs/cross-channel-attention-based-target-speaker-voice-activity-detection-experimental-results-for-m2met-challenge-2202.02687</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-channel-attention-based-target-speaker-voice-activity-detection-experimental-results-for-m2met-challenge-2202.02687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-channel-attention-based-target-speaker-voice-activity-detection-experimental-results-for-m2met-challenge-2202.02687"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-self-attention-mechanisms-for-speech-separation-2202.02884</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-self-attention-mechanisms-for-speech-separation-2202.02884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-self-attention-mechanisms-for-speech-separation-2202.02884"/></url>
<url><loc>https://scifaro.com/en/abs/building-synthetic-speaker-profiles-in-text-to-speech-systems-2202.03125</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-synthetic-speaker-profiles-in-text-to-speech-systems-2202.03125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-synthetic-speaker-profiles-in-text-to-speech-systems-2202.03125"/></url>
<url><loc>https://scifaro.com/en/abs/t-nga-temporal-network-grafting-algorithm-for-learning-to-process-spiking-audio-sensor-events-2202.03204</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-nga-temporal-network-grafting-algorithm-for-learning-to-process-spiking-audio-sensor-events-2202.03204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-nga-temporal-network-grafting-algorithm-for-learning-to-process-spiking-audio-sensor-events-2202.03204"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-aware-speech-to-text-transmission-with-redundancy-removal-2202.03211</loc><lastmod>2022-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-aware-speech-to-text-transmission-with-redundancy-removal-2202.03211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-aware-speech-to-text-transmission-with-redundancy-removal-2202.03211"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-representation-learning-for-speech-using-visual-grounding-and-masked-language-modeling-2202.03543</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-representation-learning-for-speech-using-visual-grounding-and-masked-language-modeling-2202.03543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-representation-learning-for-speech-using-visual-grounding-and-masked-language-modeling-2202.03543"/></url>
<url><loc>https://scifaro.com/en/abs/calm-contrastive-aligned-audio-language-multirate-and-multimodal-representations-2202.03587</loc><lastmod>2022-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/calm-contrastive-aligned-audio-language-multirate-and-multimodal-representations-2202.03587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/calm-contrastive-aligned-audio-language-multirate-and-multimodal-representations-2202.03587"/></url>
<url><loc>https://scifaro.com/en/abs/infergrad-improving-diffusion-models-for-vocoder-by-considering-inference-in-training-2202.03751</loc><lastmod>2022-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infergrad-improving-diffusion-models-for-vocoder-by-considering-inference-in-training-2202.03751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infergrad-improving-diffusion-models-for-vocoder-by-considering-inference-in-training-2202.03751"/></url>
<url><loc>https://scifaro.com/en/abs/mixcycle-unsupervised-speech-separation-via-cyclic-mixture-permutation-invariant-training-2202.03875</loc><lastmod>2023-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixcycle-unsupervised-speech-separation-via-cyclic-mixture-permutation-invariant-training-2202.03875"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixcycle-unsupervised-speech-separation-via-cyclic-mixture-permutation-invariant-training-2202.03875"/></url>
<url><loc>https://scifaro.com/en/abs/time-varying-harmonic-models-for-voice-signal-analysis-2202.04150</loc><lastmod>2022-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-varying-harmonic-models-for-voice-signal-analysis-2202.04150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-varying-harmonic-models-for-voice-signal-analysis-2202.04150"/></url>
<url><loc>https://scifaro.com/en/abs/a-speech-intelligibility-enhancement-model-based-on-canonical-correlation-and-deep-learning-for-hearing-assistive-technologies-2202.04172</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speech-intelligibility-enhancement-model-based-on-canonical-correlation-and-deep-learning-for-hearing-assistive-technologies-2202.04172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speech-intelligibility-enhancement-model-based-on-canonical-correlation-and-deep-learning-for-hearing-assistive-technologies-2202.04172"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-active-noise-control-based-on-individual-kernel-interpolation-of-primary-and-secondary-sound-fields-2202.04807</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-active-noise-control-based-on-individual-kernel-interpolation-of-primary-and-secondary-sound-fields-2202.04807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-active-noise-control-based-on-individual-kernel-interpolation-of-primary-and-secondary-sound-fields-2202.04807"/></url>
<url><loc>https://scifaro.com/en/abs/the-ustc-ximalaya-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-m2met-challenge-2202.04855</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ustc-ximalaya-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-m2met-challenge-2202.04855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ustc-ximalaya-system-for-the-icassp-2022-multi-channel-multi-party-meeting-transcription-m2met-challenge-2202.04855"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-style-transfer-for-text-to-speech-using-data-augmentation-2202.05083</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-style-transfer-for-text-to-speech-using-data-augmentation-2202.05083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-style-transfer-for-text-to-speech-using-data-augmentation-2202.05083"/></url>
<url><loc>https://scifaro.com/en/abs/a-probabilistic-fusion-framework-for-spoofing-aware-speaker-verification-2202.05253</loc><lastmod>2026-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-probabilistic-fusion-framework-for-spoofing-aware-speaker-verification-2202.05253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-probabilistic-fusion-framework-for-spoofing-aware-speaker-verification-2202.05253"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-diffusion-probabilistic-model-for-speech-enhancement-2202.05256</loc><lastmod>2022-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-diffusion-probabilistic-model-for-speech-enhancement-2202.05256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-diffusion-probabilistic-model-for-speech-enhancement-2202.05256"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-asr-for-stuttered-speech-with-limited-data-using-detect-and-pass-2202.05396</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-asr-for-stuttered-speech-with-limited-data-using-detect-and-pass-2202.05396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-asr-for-stuttered-speech-with-limited-data-using-detect-and-pass-2202.05396"/></url>
<url><loc>https://scifaro.com/en/abs/neural-architecture-search-for-energy-efficient-always-on-audio-models-2202.05397</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-architecture-search-for-energy-efficient-always-on-audio-models-2202.05397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-architecture-search-for-energy-efficient-always-on-audio-models-2202.05397"/></url>
<url><loc>https://scifaro.com/en/abs/the-xmuspeech-system-for-multi-channel-multi-party-meeting-transcription-challenge-2202.05744</loc><lastmod>2022-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-xmuspeech-system-for-multi-channel-multi-party-meeting-transcription-challenge-2202.05744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-xmuspeech-system-for-multi-channel-multi-party-meeting-transcription-challenge-2202.05744"/></url>
<url><loc>https://scifaro.com/en/abs/fraug-a-frame-rate-based-data-augmentation-method-for-depression-detection-from-speech-signals-2202.05912</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fraug-a-frame-rate-based-data-augmentation-method-for-depression-detection-from-speech-signals-2202.05912"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fraug-a-frame-rate-based-data-augmentation-method-for-depression-detection-from-speech-signals-2202.05912"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-depression-classification-using-articulatory-coordination-features-and-hierarchical-attention-based-text-embeddings-2202.06238</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-depression-classification-using-articulatory-coordination-features-and-hierarchical-attention-based-text-embeddings-2202.06238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-depression-classification-using-articulatory-coordination-features-and-hierarchical-attention-based-text-embeddings-2202.06238"/></url>
<url><loc>https://scifaro.com/en/abs/deepchorus-a-hybrid-model-of-multi-scale-convolution-and-self-attention-for-chorus-detection-2202.06338</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepchorus-a-hybrid-model-of-multi-scale-convolution-and-self-attention-for-chorus-detection-2202.06338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepchorus-a-hybrid-model-of-multi-scale-convolution-and-self-attention-for-chorus-detection-2202.06338"/></url>
<url><loc>https://scifaro.com/en/abs/distribution-augmentation-for-low-resource-expressive-text-to-speech-2202.06409</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distribution-augmentation-for-low-resource-expressive-text-to-speech-2202.06409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distribution-augmentation-for-low-resource-expressive-text-to-speech-2202.06409"/></url>
<url><loc>https://scifaro.com/en/abs/emgse-acoustic-emg-fusion-for-multimodal-speech-enhancement-2202.06507</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emgse-acoustic-emg-fusion-for-multimodal-speech-enhancement-2202.06507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emgse-acoustic-emg-fusion-for-multimodal-speech-enhancement-2202.06507"/></url>
<url><loc>https://scifaro.com/en/abs/tight-integration-of-neural-and-clustering-based-diarization-through-deep-unfolding-of-infinite-gaussian-mixture-model-2202.06524</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tight-integration-of-neural-and-clustering-based-diarization-through-deep-unfolding-of-infinite-gaussian-mixture-model-2202.06524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tight-integration-of-neural-and-clustering-based-diarization-through-deep-unfolding-of-infinite-gaussian-mixture-model-2202.06524"/></url>
<url><loc>https://scifaro.com/en/abs/partially-fake-audio-detection-by-self-attention-based-fake-span-discovery-2202.06684</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partially-fake-audio-detection-by-self-attention-based-fake-span-discovery-2202.06684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partially-fake-audio-detection-by-self-attention-based-fake-span-discovery-2202.06684"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-monaural-speech-enhancement-with-deep-filter-bank-equalizer-2202.06764</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-monaural-speech-enhancement-with-deep-filter-bank-equalizer-2202.06764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-monaural-speech-enhancement-with-deep-filter-bank-equalizer-2202.06764"/></url>
<url><loc>https://scifaro.com/en/abs/speech-analysis-for-automatic-mania-assessment-in-bipolar-disorder-2202.06766</loc><lastmod>2022-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-analysis-for-automatic-mania-assessment-in-bipolar-disorder-2202.06766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-analysis-for-automatic-mania-assessment-in-bipolar-disorder-2202.06766"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-word-level-prosody-tagging-for-controllable-speech-synthesis-2202.07200</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-word-level-prosody-tagging-for-controllable-speech-synthesis-2202.07200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-word-level-prosody-tagging-for-controllable-speech-synthesis-2202.07200"/></url>
<url><loc>https://scifaro.com/en/abs/spain-net-spatially-informed-stereophonic-music-source-separation-2202.07523</loc><lastmod>2022-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spain-net-spatially-informed-stereophonic-music-source-separation-2202.07523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spain-net-spatially-informed-stereophonic-music-source-separation-2202.07523"/></url>
<url><loc>https://scifaro.com/en/abs/nonverbal-sound-detection-for-disordered-speech-2202.07750</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonverbal-sound-detection-for-disordered-speech-2202.07750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonverbal-sound-detection-for-disordered-speech-2202.07750"/></url>
<url><loc>https://scifaro.com/en/abs/prosospeech-enhancing-prosody-with-quantized-vector-pre-training-in-text-to-speech-2202.07816</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosospeech-enhancing-prosody-with-quantized-vector-pre-training-in-text-to-speech-2202.07816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosospeech-enhancing-prosody-with-quantized-vector-pre-training-in-text-to-speech-2202.07816"/></url>
<url><loc>https://scifaro.com/en/abs/applade-adjustable-plug-and-play-audio-declipper-combining-dnn-with-sparse-optimization-2202.08028</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applade-adjustable-plug-and-play-audio-declipper-combining-dnn-with-sparse-optimization-2202.08028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applade-adjustable-plug-and-play-audio-declipper-combining-dnn-with-sparse-optimization-2202.08028"/></url>
<url><loc>https://scifaro.com/en/abs/voice-filter-few-shot-text-to-speech-speaker-adaptation-using-voice-conversion-as-a-post-processing-module-2202.08164</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-filter-few-shot-text-to-speech-speaker-adaptation-using-voice-conversion-as-a-post-processing-module-2202.08164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-filter-few-shot-text-to-speech-speaker-adaptation-using-voice-conversion-as-a-post-processing-module-2202.08164"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-depression-detection-an-emotional-audio-textual-corpus-and-a-gru-bilstm-based-model-2202.08210</loc><lastmod>2022-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-depression-detection-an-emotional-audio-textual-corpus-and-a-gru-bilstm-based-model-2202.08210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-depression-detection-an-emotional-audio-textual-corpus-and-a-gru-bilstm-based-model-2202.08210"/></url>
<url><loc>https://scifaro.com/en/abs/mlp-asr-sequence-length-agnostic-all-mlp-architectures-for-speech-recognition-2202.08456</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mlp-asr-sequence-length-agnostic-all-mlp-architectures-for-speech-recognition-2202.08456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mlp-asr-sequence-length-agnostic-all-mlp-architectures-for-speech-recognition-2202.08456"/></url>
<url><loc>https://scifaro.com/en/abs/wearable-seld-dataset-dataset-for-sound-event-localization-and-detection-using-wearable-devices-around-head-2202.08458</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wearable-seld-dataset-dataset-for-sound-event-localization-and-detection-using-wearable-devices-around-head-2202.08458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wearable-seld-dataset-dataset-for-sound-event-localization-and-detection-using-wearable-devices-around-head-2202.08458"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-event-detection-with-classifier-chains-2202.08470</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-event-detection-with-classifier-chains-2202.08470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-event-detection-with-classifier-chains-2202.08470"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-asr-with-self-conditioned-folded-encoders-2202.08474</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-asr-with-self-conditioned-folded-encoders-2202.08474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-asr-with-self-conditioned-folded-encoders-2202.08474"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-music-remastering-system-using-self-supervised-and-adversarial-training-2202.08520</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-music-remastering-system-using-self-supervised-and-adversarial-training-2202.08520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-music-remastering-system-using-self-supervised-and-adversarial-training-2202.08520"/></url>
<url><loc>https://scifaro.com/en/abs/mitigating-closed-model-adversarial-examples-with-bayesian-neural-modeling-for-enhanced-end-to-end-speech-recognition-2202.08532</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mitigating-closed-model-adversarial-examples-with-bayesian-neural-modeling-for-enhanced-end-to-end-speech-recognition-2202.08532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mitigating-closed-model-adversarial-examples-with-bayesian-neural-modeling-for-enhanced-end-to-end-speech-recognition-2202.08532"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-u-net-for-high-fidelity-denoising-of-historical-recordings-2202.08702</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-u-net-for-high-fidelity-denoising-of-historical-recordings-2202.08702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-u-net-for-high-fidelity-denoising-of-historical-recordings-2202.08702"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speech-denoising-for-machine-ears-2202.08793</loc><lastmod>2022-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speech-denoising-for-machine-ears-2202.08793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speech-denoising-for-machine-ears-2202.08793"/></url>
<url><loc>https://scifaro.com/en/abs/curriculum-optimization-for-low-resource-speech-recognition-2202.08883</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/curriculum-optimization-for-low-resource-speech-recognition-2202.08883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/curriculum-optimization-for-low-resource-speech-recognition-2202.08883"/></url>
<url><loc>https://scifaro.com/en/abs/vcvts-multi-speaker-video-to-speech-synthesis-via-cross-modal-knowledge-transfer-from-voice-conversion-2202.09081</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vcvts-multi-speaker-video-to-speech-synthesis-via-cross-modal-knowledge-transfer-from-voice-conversion-2202.09081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vcvts-multi-speaker-video-to-speech-synthesis-via-cross-modal-knowledge-transfer-from-voice-conversion-2202.09081"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identity-preservation-in-dysarthric-speech-reconstruction-by-adversarial-speaker-adaptation-2202.09082</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identity-preservation-in-dysarthric-speech-reconstruction-by-adversarial-speaker-adaptation-2202.09082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identity-preservation-in-dysarthric-speech-reconstruction-by-adversarial-speaker-adaptation-2202.09082"/></url>
<url><loc>https://scifaro.com/en/abs/echo-aware-adaptation-of-sound-event-localization-and-detection-in-unknown-environments-2202.09121</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echo-aware-adaptation-of-sound-event-localization-and-detection-in-unknown-environments-2202.09121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echo-aware-adaptation-of-sound-event-localization-and-detection-in-unknown-environments-2202.09121"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-and-multi-modal-event-detection-utilizing-transformer-based-multi-sensor-fusion-2202.09124</loc><lastmod>2022-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-and-multi-modal-event-detection-utilizing-transformer-based-multi-sensor-fusion-2202.09124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-and-multi-modal-event-detection-utilizing-transformer-based-multi-sensor-fusion-2202.09124"/></url>
<url><loc>https://scifaro.com/en/abs/domain-adaptation-of-low-resource-target-domain-models-using-well-trained-asr-conformer-models-2202.09167</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-adaptation-of-low-resource-target-domain-models-using-well-trained-asr-conformer-models-2202.09167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-adaptation-of-low-resource-target-domain-models-using-well-trained-asr-conformer-models-2202.09167"/></url>
<url><loc>https://scifaro.com/en/abs/lpc-augment-an-lpc-based-asr-data-augmentation-algorithm-for-low-and-zero-resource-children-s-dialects-2202.09529</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lpc-augment-an-lpc-based-asr-data-augmentation-algorithm-for-low-and-zero-resource-children-s-dialects-2202.09529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lpc-augment-an-lpc-based-asr-data-augmentation-algorithm-for-low-and-zero-resource-children-s-dialects-2202.09529"/></url>
<url><loc>https://scifaro.com/en/abs/can-social-robots-effectively-elicit-curiosity-in-stem-topics-from-k-1-students-during-oral-assessments-2202.09531</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-social-robots-effectively-elicit-curiosity-in-stem-topics-from-k-1-students-during-oral-assessments-2202.09531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-social-robots-effectively-elicit-curiosity-in-stem-topics-from-k-1-students-during-oral-assessments-2202.09531"/></url>
<url><loc>https://scifaro.com/en/abs/l-spex-localized-target-speaker-extraction-2202.09995</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l-spex-localized-target-speaker-extraction-2202.09995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l-spex-localized-target-speaker-extraction-2202.09995"/></url>
<url><loc>https://scifaro.com/en/abs/the-pcg-aiid-system-for-l3das22-challenge-mimo-and-miso-convolutional-recurrent-network-for-multi-channel-speech-enhancement-and-speech-recognition-2202.10017</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-pcg-aiid-system-for-l3das22-challenge-mimo-and-miso-convolutional-recurrent-network-for-multi-channel-speech-enhancement-and-speech-recognition-2202.10017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-pcg-aiid-system-for-l3das22-challenge-mimo-and-miso-convolutional-recurrent-network-for-multi-channel-speech-enhancement-and-speech-recognition-2202.10017"/></url>
<url><loc>https://scifaro.com/en/abs/s3t-self-supervised-pre-training-with-swin-transformer-for-music-classification-2202.10139</loc><lastmod>2022-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/s3t-self-supervised-pre-training-with-swin-transformer-for-music-classification-2202.10139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/s3t-self-supervised-pre-training-with-swin-transformer-for-music-classification-2202.10139"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-adaptation-using-spectro-temporal-deep-features-for-dysarthric-and-elderly-speech-recognition-2202.10290</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-adaptation-using-spectro-temporal-deep-features-for-dysarthric-and-elderly-speech-recognition-2202.10290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-adaptation-using-spectro-temporal-deep-features-for-dysarthric-and-elderly-speech-recognition-2202.10290"/></url>
<url><loc>https://scifaro.com/en/abs/l3das22-challenge-learning-3d-audio-sources-in-a-real-office-environment-2202.10372</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l3das22-challenge-learning-3d-audio-sources-in-a-real-office-environment-2202.10372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l3das22-challenge-learning-3d-audio-sources-in-a-real-office-environment-2202.10372"/></url>
<url><loc>https://scifaro.com/en/abs/spanish-and-english-phoneme-recognition-by-training-on-simulated-classroom-audio-recordings-of-collaborative-learning-environments-2202.10536</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spanish-and-english-phoneme-recognition-by-training-on-simulated-classroom-audio-recordings-of-collaborative-learning-environments-2202.10536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spanish-and-english-phoneme-recognition-by-training-on-simulated-classroom-audio-recordings-of-collaborative-learning-environments-2202.10536"/></url>
<url><loc>https://scifaro.com/en/abs/vadoi-voice-activity-detection-overlapping-inference-for-end-to-end-long-form-speech-recognition-2202.10593</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vadoi-voice-activity-detection-overlapping-inference-for-end-to-end-long-form-speech-recognition-2202.10593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vadoi-voice-activity-detection-overlapping-inference-for-end-to-end-long-form-speech-recognition-2202.10593"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-mixup-learning-for-improved-speaker-verification-2202.10672</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-mixup-learning-for-improved-speaker-verification-2202.10672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-mixup-learning-for-improved-speaker-verification-2202.10672"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-speech-for-improved-learning-pathological-voice-disorders-2202.10777</loc><lastmod>2022-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-speech-for-improved-learning-pathological-voice-disorders-2202.10777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-speech-for-improved-learning-pathological-voice-disorders-2202.10777"/></url>
<url><loc>https://scifaro.com/en/abs/wavebender-gan-an-architecture-for-phonetically-meaningful-speech-manipulation-2202.10973</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavebender-gan-an-architecture-for-phonetically-meaningful-speech-manipulation-2202.10973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavebender-gan-an-architecture-for-phonetically-meaningful-speech-manipulation-2202.10973"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-synthesis-on-a-shoestring-improving-the-efficiency-of-lpcnet-2202.11169</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-synthesis-on-a-shoestring-improving-the-efficiency-of-lpcnet-2202.11169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-synthesis-on-a-shoestring-improving-the-efficiency-of-lpcnet-2202.11169"/></url>
<url><loc>https://scifaro.com/en/abs/modal-estimation-on-a-warped-frequency-axis-for-linear-system-modeling-2202.11192</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modal-estimation-on-a-warped-frequency-axis-for-linear-system-modeling-2202.11192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modal-estimation-on-a-warped-frequency-axis-for-linear-system-modeling-2202.11192"/></url>
<url><loc>https://scifaro.com/en/abs/r-g2p-evaluating-and-enhancing-robustness-of-grapheme-to-phoneme-conversion-by-controlled-noise-introducing-and-contextual-information-incorporation-2202.11194</loc><lastmod>2022-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/r-g2p-evaluating-and-enhancing-robustness-of-grapheme-to-phoneme-conversion-by-controlled-noise-introducing-and-contextual-information-incorporation-2202.11194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/r-g2p-evaluating-and-enhancing-robustness-of-grapheme-to-phoneme-conversion-by-controlled-noise-introducing-and-contextual-information-incorporation-2202.11194"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-lpcnet-a-neural-vocoder-with-fully-differentiable-lpc-estimation-2202.11301</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-lpcnet-a-neural-vocoder-with-fully-differentiable-lpc-estimation-2202.11301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-lpcnet-a-neural-vocoder-with-fully-differentiable-lpc-estimation-2202.11301"/></url>
<url><loc>https://scifaro.com/en/abs/improving-fairness-in-speaker-verification-via-group-adapted-fusion-network-2202.11323</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-fairness-in-speaker-verification-via-group-adapted-fusion-network-2202.11323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-fairness-in-speaker-verification-via-group-adapted-fusion-network-2202.11323"/></url>
<url><loc>https://scifaro.com/en/abs/blind-reverberation-time-estimation-in-dynamic-acoustic-conditions-2202.11790</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-reverberation-time-estimation-in-dynamic-acoustic-conditions-2202.11790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-reverberation-time-estimation-in-dynamic-acoustic-conditions-2202.11790"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-temporal-pooling-for-conformer-based-streaming-language-identification-in-long-form-speech-2202.12163</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-temporal-pooling-for-conformer-based-streaming-language-identification-in-long-form-speech-2202.12163"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-temporal-pooling-for-conformer-based-streaming-language-identification-in-long-form-speech-2202.12163"/></url>
<url><loc>https://scifaro.com/en/abs/closing-the-gap-between-single-user-and-multi-user-voicefilter-lite-2202.12169</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/closing-the-gap-between-single-user-and-multi-user-voicefilter-lite-2202.12169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/closing-the-gap-between-single-user-and-multi-user-voicefilter-lite-2202.12169"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speaker-verification-spoofing-and-deepfake-detection-using-wav2vec-2-0-and-data-augmentation-2202.12233</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speaker-verification-spoofing-and-deepfake-detection-using-wav2vec-2-0-and-data-augmentation-2202.12233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speaker-verification-spoofing-and-deepfake-detection-using-wav2vec-2-0-and-data-augmentation-2202.12233"/></url>
<url><loc>https://scifaro.com/en/abs/towards-low-distortion-multi-channel-speech-enhancement-the-espnet-se-submission-to-the-l3das22-challenge-2202.12298</loc><lastmod>2022-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-low-distortion-multi-channel-speech-enhancement-the-espnet-se-submission-to-the-l3das22-challenge-2202.12298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-low-distortion-multi-channel-speech-enhancement-the-espnet-se-submission-to-the-l3das22-challenge-2202.12298"/></url>
<url><loc>https://scifaro.com/en/abs/towards-better-meta-initialization-with-task-augmentation-for-kindergarten-aged-speech-recognition-2202.12326</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-better-meta-initialization-with-task-augmentation-for-kindergarten-aged-speech-recognition-2202.12326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-better-meta-initialization-with-task-augmentation-for-kindergarten-aged-speech-recognition-2202.12326"/></url>
<url><loc>https://scifaro.com/en/abs/openfeat-improving-speaker-identification-by-open-set-few-shot-embedding-adaptation-with-transformer-2202.12349</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/openfeat-improving-speaker-identification-by-open-set-few-shot-embedding-adaptation-with-transformer-2202.12349"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/openfeat-improving-speaker-identification-by-open-set-few-shot-embedding-adaptation-with-transformer-2202.12349"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-gated-compensation-network-plus-for-icassp-2022-dns-challenge-2202.12643</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-gated-compensation-network-plus-for-icassp-2022-dns-challenge-2202.12643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-gated-compensation-network-plus-for-icassp-2022-dns-challenge-2202.12643"/></url>
<url><loc>https://scifaro.com/en/abs/state-of-the-art-in-speaker-recognition-2202.12705</loc><lastmod>2022-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/state-of-the-art-in-speaker-recognition-2202.12705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/state-of-the-art-in-speaker-recognition-2202.12705"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-generative-latent-variable-models-for-speech-2202.12707</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-generative-latent-variable-models-for-speech-2202.12707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-generative-latent-variable-models-for-speech-2202.12707"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-network-for-automatic-assessment-of-dysphonia-2202.12957</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-network-for-automatic-assessment-of-dysphonia-2202.12957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-network-for-automatic-assessment-of-dysphonia-2202.12957"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-over-smoothness-in-text-to-speech-2202.13066</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-over-smoothness-in-text-to-speech-2202.13066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-over-smoothness-in-text-to-speech-2202.13066"/></url>
<url><loc>https://scifaro.com/en/abs/learning-the-beauty-in-songs-neural-singing-voice-beautifier-2202.13277</loc><lastmod>2022-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-the-beauty-in-songs-neural-singing-voice-beautifier-2202.13277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-the-beauty-in-songs-neural-singing-voice-beautifier-2202.13277"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2022-deep-noise-suppression-challenge-2202.13288</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2022-deep-noise-suppression-challenge-2202.13288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2022-deep-noise-suppression-challenge-2202.13288"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2022-acoustic-echo-cancellation-challenge-2202.13290</loc><lastmod>2022-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2022-acoustic-echo-cancellation-challenge-2202.13290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2022-acoustic-echo-cancellation-challenge-2202.13290"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-deepfake-and-spoofing-detection-an-attack-analysis-using-shapley-additive-explanations-2202.13693</loc><lastmod>2022-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-deepfake-and-spoofing-detection-an-attack-analysis-using-shapley-additive-explanations-2202.13693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-deepfake-and-spoofing-detection-an-attack-analysis-using-shapley-additive-explanations-2202.13693"/></url>
<url><loc>https://scifaro.com/en/abs/magnitude-aware-probabilistic-speaker-embeddings-2202.13826</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/magnitude-aware-probabilistic-speaker-embeddings-2202.13826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/magnitude-aware-probabilistic-speaker-embeddings-2202.13826"/></url>
<url><loc>https://scifaro.com/en/abs/trillsson-distilled-universal-paralinguistic-speech-representations-2203.00236</loc><lastmod>2022-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trillsson-distilled-universal-paralinguistic-speech-representations-2203.00236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trillsson-distilled-universal-paralinguistic-speech-representations-2203.00236"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-spectrogram-inversion-on-mobile-phone-2203.00756</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-spectrogram-inversion-on-mobile-phone-2203.00756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-spectrogram-inversion-on-mobile-phone-2203.00756"/></url>
<url><loc>https://scifaro.com/en/abs/muse-svs-multi-singer-emotional-singing-voice-synthesizer-that-controls-emotional-intensity-2203.00931</loc><lastmod>2025-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/muse-svs-multi-singer-emotional-singing-voice-synthesizer-that-controls-emotional-intensity-2203.00931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/muse-svs-multi-singer-emotional-singing-voice-synthesizer-that-controls-emotional-intensity-2203.00931"/></url>
<url><loc>https://scifaro.com/en/abs/the-vicomtech-audio-deepfake-detection-system-based-on-wav2vec2-for-the-2022-add-challenge-2203.01573</loc><lastmod>2022-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-vicomtech-audio-deepfake-detection-system-based-on-wav2vec2-for-the-2022-add-challenge-2203.01573"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-vicomtech-audio-deepfake-detection-system-based-on-wav2vec2-for-the-2022-add-challenge-2203.01573"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-joint-control-of-acoustic-echo-cancellation-beamforming-and-postfiltering-2203.01793</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-joint-control-of-acoustic-echo-cancellation-beamforming-and-postfiltering-2203.01793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-joint-control-of-acoustic-echo-cancellation-beamforming-and-postfiltering-2203.01793"/></url>
<url><loc>https://scifaro.com/en/abs/adpcm-with-nonlinear-prediction-2203.01818</loc><lastmod>2022-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adpcm-with-nonlinear-prediction-2203.01818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adpcm-with-nonlinear-prediction-2203.01818"/></url>
<url><loc>https://scifaro.com/en/abs/speech-segmentation-using-multilevel-hybrid-filters-2203.01819</loc><lastmod>2022-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-segmentation-using-multilevel-hybrid-filters-2203.01819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-segmentation-using-multilevel-hybrid-filters-2203.01819"/></url>
<url><loc>https://scifaro.com/en/abs/improving-non-native-word-level-pronunciation-scoring-with-phone-level-mixup-data-augmentation-and-multi-source-information-2203.01826</loc><lastmod>2022-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-non-native-word-level-pronunciation-scoring-with-phone-level-mixup-data-augmentation-and-multi-source-information-2203.01826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-non-native-word-level-pronunciation-scoring-with-phone-level-mixup-data-augmentation-and-multi-source-information-2203.01826"/></url>
<url><loc>https://scifaro.com/en/abs/a-brief-overview-of-unsupervised-neural-speech-representation-learning-2203.01829</loc><lastmod>2022-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-brief-overview-of-unsupervised-neural-speech-representation-learning-2203.01829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-brief-overview-of-unsupervised-neural-speech-representation-learning-2203.01829"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relevance-of-language-in-speaker-recognition-2203.01992</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relevance-of-language-in-speaker-recognition-2203.01992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relevance-of-language-in-speaker-recognition-2203.01992"/></url>
<url><loc>https://scifaro.com/en/abs/manner-multi-view-attention-network-for-noise-erasure-2203.02181</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manner-multi-view-attention-network-for-noise-erasure-2203.02181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manner-multi-view-attention-network-for-noise-erasure-2203.02181"/></url>
<url><loc>https://scifaro.com/en/abs/selective-pseudo-labeling-and-class-wise-discriminative-fusion-for-sound-event-detection-2203.02191</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selective-pseudo-labeling-and-class-wise-discriminative-fusion-for-sound-event-detection-2203.02191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selective-pseudo-labeling-and-class-wise-discriminative-fusion-for-sound-event-detection-2203.02191"/></url>
<url><loc>https://scifaro.com/en/abs/percepnet-a-phase-and-snr-aware-percepnet-for-real-time-speech-enhancement-2203.02263</loc><lastmod>2022-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/percepnet-a-phase-and-snr-aware-percepnet-for-real-time-speech-enhancement-2203.02263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/percepnet-a-phase-and-snr-aware-percepnet-for-real-time-speech-enhancement-2203.02263"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-statistical-uncertainty-into-neural-network-based-speech-enhancement-2203.02288</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-statistical-uncertainty-into-neural-network-based-speech-enhancement-2203.02288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-statistical-uncertainty-into-neural-network-based-speech-enhancement-2203.02288"/></url>
<url><loc>https://scifaro.com/en/abs/language-vs-speaker-change-a-comparative-study-2203.02680</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-vs-speaker-change-a-comparative-study-2203.02680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-vs-speaker-change-a-comparative-study-2203.02680"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-pre-trained-bert-for-audio-captioning-2203.02838</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-pre-trained-bert-for-audio-captioning-2203.02838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-pre-trained-bert-for-audio-captioning-2203.02838"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-measurement-for-accurate-sound-localization-cues-2203.03166</loc><lastmod>2022-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-measurement-for-accurate-sound-localization-cues-2203.03166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-measurement-for-accurate-sound-localization-cues-2203.03166"/></url>
<url><loc>https://scifaro.com/en/abs/enhance-language-identification-using-dual-mode-model-with-knowledge-distillation-2203.03218</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhance-language-identification-using-dual-mode-model-with-knowledge-distillation-2203.03218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhance-language-identification-using-dual-mode-model-with-knowledge-distillation-2203.03218"/></url>
<url><loc>https://scifaro.com/en/abs/visually-supervised-speaker-detection-and-localization-via-microphone-array-2203.03291</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-supervised-speaker-detection-and-localization-via-microphone-array-2203.03291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-supervised-speaker-detection-and-localization-via-microphone-array-2203.03291"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-decision-forest-for-acoustic-scene-classification-2203.03436</loc><lastmod>2022-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-decision-forest-for-acoustic-scene-classification-2203.03436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-decision-forest-for-acoustic-scene-classification-2203.03436"/></url>
<url><loc>https://scifaro.com/en/abs/locate-this-not-that-class-conditioned-sound-event-doa-estimation-2203.04197</loc><lastmod>2022-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locate-this-not-that-class-conditioned-sound-event-doa-estimation-2203.04197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locate-this-not-that-class-conditioned-sound-event-doa-estimation-2203.04197"/></url>
<url><loc>https://scifaro.com/en/abs/practical-cognitive-speech-compression-2203.04415</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/practical-cognitive-speech-compression-2203.04415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/practical-cognitive-speech-compression-2203.04415"/></url>
<url><loc>https://scifaro.com/en/abs/harmonicity-plays-a-critical-role-in-dnn-based-versus-in-biologically-inspired-monaural-speech-segregation-systems-2203.04420</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonicity-plays-a-critical-role-in-dnn-based-versus-in-biologically-inspired-monaural-speech-segregation-systems-2203.04420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonicity-plays-a-critical-role-in-dnn-based-versus-in-biologically-inspired-monaural-speech-segregation-systems-2203.04420"/></url>
<url><loc>https://scifaro.com/en/abs/language-adaptive-cross-lingual-speech-representation-learning-with-sparse-sharing-sub-networks-2203.04583</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-adaptive-cross-lingual-speech-representation-learning-with-sparse-sharing-sub-networks-2203.04583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-adaptive-cross-lingual-speech-representation-learning-with-sparse-sharing-sub-networks-2203.04583"/></url>
<url><loc>https://scifaro.com/en/abs/a-practical-framework-for-multi-domain-speech-recognition-and-an-instance-sampling-method-to-neural-language-modeling-2203.04767</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-practical-framework-for-multi-domain-speech-recognition-and-an-instance-sampling-method-to-neural-language-modeling-2203.04767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-practical-framework-for-multi-domain-speech-recognition-and-an-instance-sampling-method-to-neural-language-modeling-2203.04767"/></url>
<url><loc>https://scifaro.com/en/abs/beach-to-bitch-inadvertent-unsafe-transcription-of-kids-content-on-youtube-2203.04837</loc><lastmod>2022-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beach-to-bitch-inadvertent-unsafe-transcription-of-kids-content-on-youtube-2203.04837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beach-to-bitch-inadvertent-unsafe-transcription-of-kids-content-on-youtube-2203.04837"/></url>
<url><loc>https://scifaro.com/en/abs/echo-enabled-direction-of-arrival-and-range-estimation-of-a-mobile-source-in-ambisonic-domain-2203.05265</loc><lastmod>2022-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/echo-enabled-direction-of-arrival-and-range-estimation-of-a-mobile-source-in-ambisonic-domain-2203.05265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/echo-enabled-direction-of-arrival-and-range-estimation-of-a-mobile-source-in-ambisonic-domain-2203.05265"/></url>
<url><loc>https://scifaro.com/en/abs/ksof-the-kassel-state-of-fluency-dataset-a-therapy-centered-dataset-of-stuttering-2203.05383</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ksof-the-kassel-state-of-fluency-dataset-a-therapy-centered-dataset-of-stuttering-2203.05383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ksof-the-kassel-state-of-fluency-dataset-a-therapy-centered-dataset-of-stuttering-2203.05383"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-streaming-asr-with-cumulative-attention-2203.05736</loc><lastmod>2022-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-streaming-asr-with-cumulative-attention-2203.05736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-streaming-asr-with-cumulative-attention-2203.05736"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-using-multi-resolution-spectro-temporal-representations-of-speech-signals-2203.05780</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-using-multi-resolution-spectro-temporal-representations-of-speech-signals-2203.05780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-using-multi-resolution-spectro-temporal-representations-of-speech-signals-2203.05780"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-modification-based-data-augmentation-for-improving-end-to-end-asr-for-children-s-speech-2203.06600</loc><lastmod>2022-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-modification-based-data-augmentation-for-improving-end-to-end-asr-for-children-s-speech-2203.06600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-modification-based-data-augmentation-for-improving-end-to-end-asr-for-children-s-speech-2203.06600"/></url>
<url><loc>https://scifaro.com/en/abs/dawn-of-the-transformer-era-in-speech-emotion-recognition-closing-the-valence-gap-2203.07378</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dawn-of-the-transformer-era-in-speech-emotion-recognition-closing-the-valence-gap-2203.07378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dawn-of-the-transformer-era-in-speech-emotion-recognition-closing-the-valence-gap-2203.07378"/></url>
<url><loc>https://scifaro.com/en/abs/fb-mstcn-a-full-band-single-channel-speech-enhancement-method-based-on-multi-scale-temporal-convolutional-network-2203.07684</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fb-mstcn-a-full-band-single-channel-speech-enhancement-method-based-on-multi-scale-temporal-convolutional-network-2203.07684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fb-mstcn-a-full-band-single-channel-speech-enhancement-method-based-on-multi-scale-temporal-convolutional-network-2203.07684"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-self-supervised-learning-for-speech-enhancement-and-separation-2203.07960</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-self-supervised-learning-for-speech-enhancement-and-separation-2203.07960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-self-supervised-learning-for-speech-enhancement-and-separation-2203.07960"/></url>
<url><loc>https://scifaro.com/en/abs/text-free-non-parallel-many-to-many-voice-conversion-using-normalising-flows-2203.08009</loc><lastmod>2022-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-free-non-parallel-many-to-many-voice-conversion-using-normalising-flows-2203.08009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-free-non-parallel-many-to-many-voice-conversion-using-normalising-flows-2203.08009"/></url>
<url><loc>https://scifaro.com/en/abs/a-squeeze-and-excitation-and-transformer-based-cross-task-system-for-environmental-sound-recognition-2203.08350</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-squeeze-and-excitation-and-transformer-based-cross-task-system-for-environmental-sound-recognition-2203.08350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-squeeze-and-excitation-and-transformer-based-cross-task-system-for-environmental-sound-recognition-2203.08350"/></url>
<url><loc>https://scifaro.com/en/abs/pushing-the-limits-of-raw-waveform-speaker-recognition-2203.08488</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pushing-the-limits-of-raw-waveform-speaker-recognition-2203.08488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pushing-the-limits-of-raw-waveform-speaker-recognition-2203.08488"/></url>
<url><loc>https://scifaro.com/en/abs/semi-fedser-semi-supervised-learning-for-speech-emotion-recognition-on-federated-learning-using-multiview-pseudo-labeling-2203.08810</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-fedser-semi-supervised-learning-for-speech-emotion-recognition-on-federated-learning-using-multiview-pseudo-labeling-2203.08810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-fedser-semi-supervised-learning-for-speech-emotion-recognition-on-federated-learning-using-multiview-pseudo-labeling-2203.08810"/></url>
<url><loc>https://scifaro.com/en/abs/to-train-or-not-to-train-adversarially-a-study-of-bias-mitigation-strategies-for-speaker-recognition-2203.09122</loc><lastmod>2022-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-train-or-not-to-train-adversarially-a-study-of-bias-mitigation-strategies-for-speaker-recognition-2203.09122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-train-or-not-to-train-adversarially-a-study-of-bias-mitigation-strategies-for-speaker-recognition-2203.09122"/></url>
<url><loc>https://scifaro.com/en/abs/feature-informed-latent-space-regularization-for-music-source-separation-2203.09132</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-informed-latent-space-regularization-for-music-source-separation-2203.09132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-informed-latent-space-regularization-for-music-source-separation-2203.09132"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-preserving-speech-representation-learning-using-vector-quantization-2203.09518</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-preserving-speech-representation-learning-using-vector-quantization-2203.09518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-preserving-speech-representation-learning-using-vector-quantization-2203.09518"/></url>
<url><loc>https://scifaro.com/en/abs/a-3-t-alignment-aware-acoustic-and-text-pretraining-for-speech-synthesis-and-editing-2203.09690</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-3-t-alignment-aware-acoustic-and-text-pretraining-for-speech-synthesis-and-editing-2203.09690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-3-t-alignment-aware-acoustic-and-text-pretraining-for-speech-synthesis-and-editing-2203.09690"/></url>
<url><loc>https://scifaro.com/en/abs/soft-smoothness-for-audio-inpainting-using-a-latent-matrix-model-in-delay-embedded-space-2203.09746</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-smoothness-for-audio-inpainting-using-a-latent-matrix-model-in-delay-embedded-space-2203.09746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-smoothness-for-audio-inpainting-using-a-latent-matrix-model-in-delay-embedded-space-2203.09746"/></url>
<url><loc>https://scifaro.com/en/abs/consonant-vowel-transition-models-based-on-deep-learning-for-objective-evaluation-of-articulation-2203.10054</loc><lastmod>2022-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consonant-vowel-transition-models-based-on-deep-learning-for-objective-evaluation-of-articulation-2203.10054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consonant-vowel-transition-models-based-on-deep-learning-for-objective-evaluation-of-articulation-2203.10054"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-cross-domain-acoustic-to-articulatory-inverted-features-for-disordered-speech-recognition-2203.10274</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-cross-domain-acoustic-to-articulatory-inverted-features-for-disordered-speech-recognition-2203.10274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-cross-domain-acoustic-to-articulatory-inverted-features-for-disordered-speech-recognition-2203.10274"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-speaker-verification-embedding-extractors-and-back-ends-under-language-and-channel-mismatch-2203.10300</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-speaker-verification-embedding-extractors-and-back-ends-under-language-and-channel-mismatch-2203.10300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-speaker-verification-embedding-extractors-and-back-ends-under-language-and-channel-mismatch-2203.10300"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-effort-modeling-in-neural-tts-for-improving-the-intelligibility-of-synthetic-speech-in-noise-2203.10637</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-effort-modeling-in-neural-tts-for-improving-the-intelligibility-of-synthetic-speech-in-noise-2203.10637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-effort-modeling-in-neural-tts-for-improving-the-intelligibility-of-synthetic-speech-in-noise-2203.10637"/></url>
<url><loc>https://scifaro.com/en/abs/separating-content-from-speaker-identity-in-speech-for-the-assessment-of-cognitive-impairments-2203.10827</loc><lastmod>2022-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separating-content-from-speaker-identity-in-speech-for-the-assessment-of-cognitive-impairments-2203.10827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separating-content-from-speaker-identity-in-speech-for-the-assessment-of-cognitive-impairments-2203.10827"/></url>
<url><loc>https://scifaro.com/en/abs/joint-noise-reduction-and-listening-enhancement-for-full-end-speech-enhancement-2203.11500</loc><lastmod>2022-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-noise-reduction-and-listening-enhancement-for-full-end-speech-enhancement-2203.11500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-noise-reduction-and-listening-enhancement-for-full-end-speech-enhancement-2203.11500"/></url>
<url><loc>https://scifaro.com/en/abs/upmixing-via-style-transfer-a-variational-autoencoder-for-disentangling-spatial-images-and-musical-content-2203.12053</loc><lastmod>2022-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/upmixing-via-style-transfer-a-variational-autoencoder-for-disentangling-spatial-images-and-musical-content-2203.12053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/upmixing-via-style-transfer-a-variational-autoencoder-for-disentangling-spatial-images-and-musical-content-2203.12053"/></url>
<url><loc>https://scifaro.com/en/abs/pho-lid-a-unified-model-incorporating-acoustic-phonetic-and-phonotactic-information-for-language-identification-2203.12366</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pho-lid-a-unified-model-incorporating-acoustic-phonetic-and-phonotactic-information-for-language-identification-2203.12366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pho-lid-a-unified-model-incorporating-acoustic-phonetic-and-phonotactic-information-for-language-identification-2203.12366"/></url>
<url><loc>https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-evaluation-plan-2203.12468</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-evaluation-plan-2203.12468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-evaluation-plan-2203.12468"/></url>
<url><loc>https://scifaro.com/en/abs/a-scalable-model-specialization-framework-for-training-and-inference-using-submodels-and-its-application-to-speech-model-personalization-2203.12559</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-scalable-model-specialization-framework-for-training-and-inference-using-submodels-and-its-application-to-speech-model-personalization-2203.12559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-scalable-model-specialization-framework-for-training-and-inference-using-submodels-and-its-application-to-speech-model-personalization-2203.12559"/></url>
<url><loc>https://scifaro.com/en/abs/characterizing-therapist-s-speaking-style-in-relation-to-empathy-in-psychotherapy-2203.13127</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/characterizing-therapist-s-speaking-style-in-relation-to-empathy-in-psychotherapy-2203.13127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/characterizing-therapist-s-speaking-style-in-relation-to-empathy-in-psychotherapy-2203.13127"/></url>
<url><loc>https://scifaro.com/en/abs/computing-optimal-location-of-microphone-for-improved-speech-recognition-2203.13259</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computing-optimal-location-of-microphone-for-improved-speech-recognition-2203.13259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computing-optimal-location-of-microphone-for-improved-speech-recognition-2203.13259"/></url>
<url><loc>https://scifaro.com/en/abs/pseudo-label-transfer-from-frame-level-to-note-level-in-a-teacher-student-framework-for-singing-transcription-from-polyphonic-music-2203.13422</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseudo-label-transfer-from-frame-level-to-note-level-in-a-teacher-student-framework-for-singing-transcription-from-polyphonic-music-2203.13422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseudo-label-transfer-from-frame-level-to-note-level-in-a-teacher-student-framework-for-singing-transcription-from-polyphonic-music-2203.13422"/></url>
<url><loc>https://scifaro.com/en/abs/bddm-bilateral-denoising-diffusion-models-for-fast-and-high-quality-speech-synthesis-2203.13508</loc><lastmod>2022-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bddm-bilateral-denoising-diffusion-models-for-fast-and-high-quality-speech-synthesis-2203.13508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bddm-bilateral-denoising-diffusion-models-for-fast-and-high-quality-speech-synthesis-2203.13508"/></url>
<url><loc>https://scifaro.com/en/abs/embedding-recurrent-layers-with-dual-path-strategy-in-a-variant-of-convolutional-network-for-speaker-independent-speech-separation-2203.13574</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedding-recurrent-layers-with-dual-path-strategy-in-a-variant-of-convolutional-network-for-speaker-independent-speech-separation-2203.13574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedding-recurrent-layers-with-dual-path-strategy-in-a-variant-of-convolutional-network-for-speaker-independent-speech-separation-2203.13574"/></url>
<url><loc>https://scifaro.com/en/abs/emotionnas-two-stream-neural-architecture-search-for-speech-emotion-recognition-2203.13617</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotionnas-two-stream-neural-architecture-search-for-speech-emotion-recognition-2203.13617"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotionnas-two-stream-neural-architecture-search-for-speech-emotion-recognition-2203.13617"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-processing-front-end-for-distant-asr-exploiting-self-attention-channel-combinator-2203.13919</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-processing-front-end-for-distant-asr-exploiting-self-attention-channel-combinator-2203.13919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-processing-front-end-for-distant-asr-exploiting-self-attention-channel-combinator-2203.13919"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-vocoder-based-packet-loss-concealment-algorithm-2203.14010</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-vocoder-based-packet-loss-concealment-algorithm-2203.14010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-vocoder-based-packet-loss-concealment-algorithm-2203.14010"/></url>
<url><loc>https://scifaro.com/en/abs/remix-cycle-consistent-learning-on-adversarially-learned-separator-for-accurate-and-stable-unsupervised-speech-separation-2203.14080</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remix-cycle-consistent-learning-on-adversarially-learned-separator-for-accurate-and-stable-unsupervised-speech-separation-2203.14080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remix-cycle-consistent-learning-on-adversarially-learned-separator-for-accurate-and-stable-unsupervised-speech-separation-2203.14080"/></url>
<url><loc>https://scifaro.com/en/abs/speechsplit-2-0-unsupervised-speech-disentanglement-for-voice-conversion-without-tuning-autoencoder-bottlenecks-2203.14156</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechsplit-2-0-unsupervised-speech-disentanglement-for-voice-conversion-without-tuning-autoencoder-bottlenecks-2203.14156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechsplit-2-0-unsupervised-speech-disentanglement-for-voice-conversion-without-tuning-autoencoder-bottlenecks-2203.14156"/></url>
<url><loc>https://scifaro.com/en/abs/a-speech-representation-anonymization-framework-via-selective-noise-perturbation-2203.14171</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speech-representation-anonymization-framework-via-selective-noise-perturbation-2203.14171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speech-representation-anonymization-framework-via-selective-noise-perturbation-2203.14171"/></url>
<url><loc>https://scifaro.com/en/abs/listen-adapt-better-wer-source-free-single-utterance-test-time-adaptation-for-automatic-speech-recognition-2203.14222</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-adapt-better-wer-source-free-single-utterance-test-time-adaptation-for-automatic-speech-recognition-2203.14222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-adapt-better-wer-source-free-single-utterance-test-time-adaptation-for-automatic-speech-recognition-2203.14222"/></url>
<url><loc>https://scifaro.com/en/abs/bunched-lpcnet2-efficient-neural-vocoders-covering-devices-from-cloud-to-edge-2203.14416</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bunched-lpcnet2-efficient-neural-vocoders-covering-devices-from-cloud-to-edge-2203.14416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bunched-lpcnet2-efficient-neural-vocoders-covering-devices-from-cloud-to-edge-2203.14416"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-wideband-doa-estimation-method-by-frequency-focusing-and-error-weighting-2203.14494</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-wideband-doa-estimation-method-by-frequency-focusing-and-error-weighting-2203.14494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-wideband-doa-estimation-method-by-frequency-focusing-and-error-weighting-2203.14494"/></url>
<url><loc>https://scifaro.com/en/abs/curriculum-learning-for-self-supervised-speaker-verification-2203.14525</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/curriculum-learning-for-self-supervised-speaker-verification-2203.14525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/curriculum-learning-for-self-supervised-speaker-verification-2203.14525"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-active-learning-based-training-data-selection-for-speech-spoofing-countermeasure-2203.14553</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-active-learning-based-training-data-selection-for-speech-spoofing-countermeasure-2203.14553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-active-learning-based-training-data-selection-for-speech-spoofing-countermeasure-2203.14553"/></url>
<url><loc>https://scifaro.com/en/abs/an-effective-dereverberation-algorithm-by-fusing-mvdr-and-mclp-2203.14561</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-effective-dereverberation-algorithm-by-fusing-mvdr-and-mclp-2203.14561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-effective-dereverberation-algorithm-by-fusing-mvdr-and-mclp-2203.14561"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-fly-feature-based-rapid-speaker-adaptation-for-dysarthric-and-elderly-speech-recognition-2203.14593</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-fly-feature-based-rapid-speaker-adaptation-for-dysarthric-and-elderly-speech-recognition-2203.14593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-fly-feature-based-rapid-speaker-adaptation-for-dysarthric-and-elderly-speech-recognition-2203.14593"/></url>
<url><loc>https://scifaro.com/en/abs/syncnet-correlating-objective-for-time-delay-estimation-in-audio-signals-2203.14639</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syncnet-correlating-objective-for-time-delay-estimation-in-audio-signals-2203.14639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syncnet-correlating-objective-for-time-delay-estimation-in-audio-signals-2203.14639"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-voice-conversion-and-code-switching-synthesis-using-vq-vae-2203.14640</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-voice-conversion-and-code-switching-synthesis-using-vq-vae-2203.14640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-voice-conversion-and-code-switching-synthesis-using-vq-vae-2203.14640"/></url>
<url><loc>https://scifaro.com/en/abs/sasv-2022-the-first-spoofing-aware-speaker-verification-challenge-2203.14732</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sasv-2022-the-first-spoofing-aware-speaker-verification-challenge-2203.14732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sasv-2022-the-first-spoofing-aware-speaker-verification-challenge-2203.14732"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-style-learning-for-end-to-end-noise-robust-speech-recognition-2203.14838</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-style-learning-for-end-to-end-noise-robust-speech-recognition-2203.14838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-style-learning-for-end-to-end-noise-robust-speech-recognition-2203.14838"/></url>
<url><loc>https://scifaro.com/en/abs/towards-transferable-speech-emotion-representation-on-loss-functions-for-cross-lingual-latent-representations-2203.14865</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-transferable-speech-emotion-representation-on-loss-functions-for-cross-lingual-latent-representations-2203.14865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-transferable-speech-emotion-representation-on-loss-functions-for-cross-lingual-latent-representations-2203.14865"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-metric-learning-for-transferable-speech-emotion-recognition-and-embedding-across-low-resource-languages-2203.14867</loc><lastmod>2022-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-metric-learning-for-transferable-speech-emotion-recognition-and-embedding-across-low-resource-languages-2203.14867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-metric-learning-for-transferable-speech-emotion-recognition-and-embedding-across-low-resource-languages-2203.14867"/></url>
<url><loc>https://scifaro.com/en/abs/neural-vocoder-is-all-you-need-for-speech-super-resolution-2203.14941</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-vocoder-is-all-you-need-for-speech-super-resolution-2203.14941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-vocoder-is-all-you-need-for-speech-super-resolution-2203.14941"/></url>
<url><loc>https://scifaro.com/en/abs/word-discovery-in-visually-grounded-self-supervised-speech-models-2203.15081</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/word-discovery-in-visually-grounded-self-supervised-speech-models-2203.15081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/word-discovery-in-visually-grounded-self-supervised-speech-models-2203.15081"/></url>
<url><loc>https://scifaro.com/en/abs/improved-singing-voice-separation-with-chromagram-based-pitch-aware-remixing-2203.15092</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-singing-voice-separation-with-chromagram-based-pitch-aware-remixing-2203.15092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-singing-voice-separation-with-chromagram-based-pitch-aware-remixing-2203.15092"/></url>
<url><loc>https://scifaro.com/en/abs/separate-what-you-describe-language-queried-audio-source-separation-2203.15147</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-what-you-describe-language-queried-audio-source-separation-2203.15147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-what-you-describe-language-queried-audio-source-separation-2203.15147"/></url>
<url><loc>https://scifaro.com/en/abs/visualizations-of-complex-sequences-of-family-infant-vocalizations-using-bag-of-audio-words-approach-based-on-wav2vec-2-0-features-2203.15183</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualizations-of-complex-sequences-of-family-infant-vocalizations-using-bag-of-audio-words-approach-based-on-wav2vec-2-0-features-2203.15183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualizations-of-complex-sequences-of-family-infant-vocalizations-using-bag-of-audio-words-approach-based-on-wav2vec-2-0-features-2203.15183"/></url>
<url><loc>https://scifaro.com/en/abs/decomposed-temporal-dynamic-cnn-efficient-time-adaptive-network-for-text-independent-speaker-verification-explained-with-speaker-activation-map-2203.15277</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decomposed-temporal-dynamic-cnn-efficient-time-adaptive-network-for-text-independent-speaker-verification-explained-with-speaker-activation-map-2203.15277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decomposed-temporal-dynamic-cnn-efficient-time-adaptive-network-for-text-independent-speaker-verification-explained-with-speaker-activation-map-2203.15277"/></url>
<url><loc>https://scifaro.com/en/abs/mel-frequency-spectral-domain-defenses-against-adversarial-attacks-on-speech-recognition-systems-2203.15283</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-frequency-spectral-domain-defenses-against-adversarial-attacks-on-speech-recognition-systems-2203.15283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-frequency-spectral-domain-defenses-against-adversarial-attacks-on-speech-recognition-systems-2203.15283"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-dynamic-convolution-frequency-adaptive-pattern-recognition-for-sound-event-detection-2203.15296</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-dynamic-convolution-frequency-adaptive-pattern-recognition-for-sound-event-detection-2203.15296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-dynamic-convolution-frequency-adaptive-pattern-recognition-for-sound-event-detection-2203.15296"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-speech-sound-disorder-in-child-speech-using-posterior-based-speaker-representations-2203.15405</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-speech-sound-disorder-in-child-speech-using-posterior-based-speaker-representations-2203.15405"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-speech-sound-disorder-in-child-speech-using-posterior-based-speaker-representations-2203.15405"/></url>
<url><loc>https://scifaro.com/en/abs/training-speaker-embedding-extractors-using-multi-speaker-audio-with-unknown-speaker-boundaries-2203.15436</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-speaker-embedding-extractors-using-multi-speaker-audio-with-unknown-speaker-boundaries-2203.15436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-speaker-embedding-extractors-using-multi-speaker-audio-with-unknown-speaker-boundaries-2203.15436"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-framework-for-low-resource-text-to-speech-using-a-large-scale-unlabeled-speech-corpus-2203.15447</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-framework-for-low-resource-text-to-speech-using-a-large-scale-unlabeled-speech-corpus-2203.15447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-framework-for-low-resource-text-to-speech-using-a-large-scale-unlabeled-speech-corpus-2203.15447"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-directional-attention-model-for-multilingual-automatic-speech-recognition-2203.15473</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-directional-attention-model-for-multilingual-automatic-speech-recognition-2203.15473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-directional-attention-model-for-multilingual-automatic-speech-recognition-2203.15473"/></url>
<url><loc>https://scifaro.com/en/abs/on-metric-learning-for-audio-text-cross-modal-retrieval-2203.15537</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-metric-learning-for-audio-text-cross-modal-retrieval-2203.15537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-metric-learning-for-audio-text-cross-modal-retrieval-2203.15537"/></url>
<url><loc>https://scifaro.com/en/abs/lighthubert-lightweight-and-configurable-speech-representation-learning-with-once-for-all-hidden-unit-bert-2203.15610</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lighthubert-lightweight-and-configurable-speech-representation-learning-with-once-for-all-hidden-unit-bert-2203.15610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lighthubert-lightweight-and-configurable-speech-representation-learning-with-once-for-all-hidden-unit-bert-2203.15610"/></url>
<url><loc>https://scifaro.com/en/abs/cyclegan-based-unpaired-speech-dereverberation-2203.15652</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cyclegan-based-unpaired-speech-dereverberation-2203.15652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cyclegan-based-unpaired-speech-dereverberation-2203.15652"/></url>
<url><loc>https://scifaro.com/en/abs/a-passive-similarity-based-cnn-filter-pruning-for-efficient-acoustic-scene-classification-2203.15751</loc><lastmod>2022-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-passive-similarity-based-cnn-filter-pruning-for-efficient-acoustic-scene-classification-2203.15751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-passive-similarity-based-cnn-filter-pruning-for-efficient-acoustic-scene-classification-2203.15751"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-text-to-speech-synthesis-by-unsupervised-automatic-speech-recognition-2203.15796</loc><lastmod>2022-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-text-to-speech-synthesis-by-unsupervised-automatic-speech-recognition-2203.15796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-text-to-speech-synthesis-by-unsupervised-automatic-speech-recognition-2203.15796"/></url>
<url><loc>https://scifaro.com/en/abs/wavprompt-towards-few-shot-spoken-language-understanding-with-frozen-language-models-2203.15863</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavprompt-towards-few-shot-spoken-language-understanding-with-frozen-language-models-2203.15863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavprompt-towards-few-shot-spoken-language-understanding-with-frozen-language-models-2203.15863"/></url>
<url><loc>https://scifaro.com/en/abs/improving-mispronunciation-detection-with-wav2vec2-based-momentum-pseudo-labeling-for-accentedness-and-intelligibility-assessment-2203.15937</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-mispronunciation-detection-with-wav2vec2-based-momentum-pseudo-labeling-for-accentedness-and-intelligibility-assessment-2203.15937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-mispronunciation-detection-with-wav2vec2-based-momentum-pseudo-labeling-for-accentedness-and-intelligibility-assessment-2203.15937"/></url>
<url><loc>https://scifaro.com/en/abs/4-bit-conformer-with-native-quantization-aware-training-for-speech-recognition-2203.15952</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/4-bit-conformer-with-native-quantization-aware-training-for-speech-recognition-2203.15952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/4-bit-conformer-with-native-quantization-aware-training-for-speech-recognition-2203.15952"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-dynamic-scale-weighting-2203.15974</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-dynamic-scale-weighting-2203.15974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-speaker-diarization-with-dynamic-scale-weighting-2203.15974"/></url>
<url><loc>https://scifaro.com/en/abs/device-directed-speech-detection-regularization-via-distillation-for-weakly-supervised-models-2203.15975</loc><lastmod>2022-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/device-directed-speech-detection-regularization-via-distillation-for-weakly-supervised-models-2203.15975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/device-directed-speech-detection-regularization-via-distillation-for-weakly-supervised-models-2203.15975"/></url>
<url><loc>https://scifaro.com/en/abs/asymmetric-proxy-loss-for-multi-view-acoustic-word-embeddings-2203.16080</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asymmetric-proxy-loss-for-multi-view-acoustic-word-embeddings-2203.16080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asymmetric-proxy-loss-for-multi-view-acoustic-word-embeddings-2203.16080"/></url>
<url><loc>https://scifaro.com/en/abs/using-adapters-to-overcome-catastrophic-forgetting-in-end-to-end-automatic-speech-recognition-2203.16082</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-adapters-to-overcome-catastrophic-forgetting-in-end-to-end-automatic-speech-recognition-2203.16082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-adapters-to-overcome-catastrophic-forgetting-in-end-to-end-automatic-speech-recognition-2203.16082"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-generation-conditioned-on-continuous-valued-emotions-2203.16165</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-generation-conditioned-on-continuous-valued-emotions-2203.16165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-generation-conditioned-on-continuous-valued-emotions-2203.16165"/></url>
<url><loc>https://scifaro.com/en/abs/probing-phoneme-language-and-speaker-information-in-unsupervised-speech-representations-2203.16193</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-phoneme-language-and-speaker-information-in-unsupervised-speech-representations-2203.16193"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-phoneme-language-and-speaker-information-in-unsupervised-speech-representations-2203.16193"/></url>
<url><loc>https://scifaro.com/en/abs/phase-aware-deep-speech-enhancement-it-s-all-about-the-frame-length-2203.16222</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phase-aware-deep-speech-enhancement-it-s-all-about-the-frame-length-2203.16222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phase-aware-deep-speech-enhancement-it-s-all-about-the-frame-length-2203.16222"/></url>
<url><loc>https://scifaro.com/en/abs/joint-domain-adaptation-and-speech-bandwidth-extension-using-time-domain-gans-for-speaker-verification-2203.16614</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-domain-adaptation-and-speech-bandwidth-extension-using-time-domain-gans-for-speaker-verification-2203.16614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-domain-adaptation-and-speech-bandwidth-extension-using-time-domain-gans-for-speaker-verification-2203.16614"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-speaker-attributed-asr-with-token-level-speaker-embeddings-2203.16685</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-speaker-attributed-asr-with-token-level-speaker-embeddings-2203.16685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-speaker-attributed-asr-with-token-level-speaker-embeddings-2203.16685"/></url>
<url><loc>https://scifaro.com/en/abs/mae-ast-masked-autoencoding-audio-spectrogram-transformer-2203.16691</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mae-ast-masked-autoencoding-audio-spectrogram-transformer-2203.16691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mae-ast-masked-autoencoding-audio-spectrogram-transformer-2203.16691"/></url>
<url><loc>https://scifaro.com/en/abs/robust-disentangled-variational-speech-representation-learning-for-zero-shot-voice-conversion-2203.16705</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-disentangled-variational-speech-representation-learning-for-zero-shot-voice-conversion-2203.16705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-disentangled-variational-speech-representation-learning-for-zero-shot-voice-conversion-2203.16705"/></url>
<url><loc>https://scifaro.com/en/abs/specgrad-diffusion-probabilistic-model-based-neural-vocoder-with-adaptive-noise-spectral-shaping-2203.16749</loc><lastmod>2022-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/specgrad-diffusion-probabilistic-model-based-neural-vocoder-with-adaptive-noise-spectral-shaping-2203.16749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/specgrad-diffusion-probabilistic-model-based-neural-vocoder-with-adaptive-noise-spectral-shaping-2203.16749"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-a-comparative-study-2203.16757</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-a-comparative-study-2203.16757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-single-channel-speech-for-multi-channel-end-to-end-speech-recognition-a-comparative-study-2203.16757"/></url>
<url><loc>https://scifaro.com/en/abs/cuside-chunking-simulating-future-context-and-decoding-for-streaming-asr-2203.16758</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuside-chunking-simulating-future-context-and-decoding-for-streaming-asr-2203.16758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuside-chunking-simulating-future-context-and-decoding-for-streaming-asr-2203.16758"/></url>
<url><loc>https://scifaro.com/en/abs/speechprompt-an-exploration-of-prompt-tuning-on-generative-spoken-language-model-for-speech-processing-tasks-2203.16773</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechprompt-an-exploration-of-prompt-tuning-on-generative-spoken-language-model-for-speech-processing-tasks-2203.16773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechprompt-an-exploration-of-prompt-tuning-on-generative-spoken-language-model-for-speech-processing-tasks-2203.16773"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-of-language-model-integration-for-transducer-based-speech-recognition-2203.16776</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-of-language-model-integration-for-transducer-based-speech-recognition-2203.16776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-of-language-model-integration-for-transducer-based-speech-recognition-2203.16776"/></url>
<url><loc>https://scifaro.com/en/abs/how-does-pre-trained-wav2vec-2-0-perform-on-domain-shifted-asr-an-extensive-benchmark-on-air-traffic-control-communications-2203.16822</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-does-pre-trained-wav2vec-2-0-perform-on-domain-shifted-asr-an-extensive-benchmark-on-air-traffic-control-communications-2203.16822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-does-pre-trained-wav2vec-2-0-perform-on-domain-shifted-asr-an-extensive-benchmark-on-air-traffic-control-communications-2203.16822"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-extraction-with-co-speech-gestures-cue-2203.16840</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-extraction-with-co-speech-gestures-cue-2203.16840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-extraction-with-co-speech-gestures-cue-2203.16840"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-continuity-loss-to-reduce-over-suppression-for-time-domain-target-speaker-extraction-2203.16843</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-continuity-loss-to-reduce-over-suppression-for-time-domain-target-speaker-extraction-2203.16843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-continuity-loss-to-reduce-over-suppression-for-time-domain-target-speaker-extraction-2203.16843"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-attention-network-for-evaluating-therapist-empathy-in-counseling-session-2203.16847</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-attention-network-for-evaluating-therapist-empathy-in-counseling-session-2203.16847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-attention-network-for-evaluating-therapist-empathy-in-counseling-session-2203.16847"/></url>
<url><loc>https://scifaro.com/en/abs/jets-jointly-training-fastspeech2-and-hifi-gan-for-end-to-end-text-to-speech-2203.16852</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jets-jointly-training-fastspeech2-and-hifi-gan-for-end-to-end-text-to-speech-2203.16852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jets-jointly-training-fastspeech2-and-hifi-gan-for-end-to-end-text-to-speech-2203.16852"/></url>
<url><loc>https://scifaro.com/en/abs/memory-efficient-training-of-rnn-transducer-with-sampled-softmax-2203.16868</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-efficient-training-of-rnn-transducer-with-sampled-softmax-2203.16868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-efficient-training-of-rnn-transducer-with-sampled-softmax-2203.16868"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-estimation-of-sound-sources-using-icosahedral-cnns-2203.16940</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-sound-sources-using-icosahedral-cnns-2203.16940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-of-sound-sources-using-icosahedral-cnns-2203.16940"/></url>
<url><loc>https://scifaro.com/en/abs/improving-language-identification-of-accented-speech-2203.16972</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-language-identification-of-accented-speech-2203.16972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-language-identification-of-accented-speech-2203.16972"/></url>
<url><loc>https://scifaro.com/en/abs/singaug-data-augmentation-for-singing-voice-synthesis-with-cycle-consistent-training-strategy-2203.17001</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singaug-data-augmentation-for-singing-voice-synthesis-with-cycle-consistent-training-strategy-2203.17001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singaug-data-augmentation-for-singing-voice-synthesis-with-cycle-consistent-training-strategy-2203.17001"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-score-based-generative-models-in-the-complex-stft-domain-2203.17004</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-score-based-generative-models-in-the-complex-stft-domain-2203.17004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-score-based-generative-models-in-the-complex-stft-domain-2203.17004"/></url>
<url><loc>https://scifaro.com/en/abs/deepfry-identifying-vocal-fry-using-deep-neural-networks-2203.17019</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfry-identifying-vocal-fry-using-deep-neural-networks-2203.17019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfry-identifying-vocal-fry-using-deep-neural-networks-2203.17019"/></url>
<url><loc>https://scifaro.com/en/abs/partial-coupling-of-optimal-transport-for-spoken-language-identification-2203.17036</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partial-coupling-of-optimal-transport-for-spoken-language-identification-2203.17036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partial-coupling-of-optimal-transport-for-spoken-language-identification-2203.17036"/></url>
<url><loc>https://scifaro.com/en/abs/eend-ss-joint-end-to-end-neural-speaker-diarization-and-speech-separation-for-flexible-number-of-speakers-2203.17068</loc><lastmod>2022-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eend-ss-joint-end-to-end-neural-speaker-diarization-and-speech-separation-for-flexible-number-of-speakers-2203.17068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eend-ss-joint-end-to-end-neural-speaker-diarization-and-speech-separation-for-flexible-number-of-speakers-2203.17068"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-non-autoregressive-gan-voice-conversion-using-vqwav2vec-features-and-dynamic-convolution-2203.17172</loc><lastmod>2022-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-non-autoregressive-gan-voice-conversion-using-vqwav2vec-features-and-dynamic-convolution-2203.17172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-non-autoregressive-gan-voice-conversion-using-vqwav2vec-features-and-dynamic-convolution-2203.17172"/></url>
<url><loc>https://scifaro.com/en/abs/mixed-phoneme-bert-improving-bert-with-mixed-phoneme-and-sup-phoneme-representations-for-text-to-speech-2203.17190</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixed-phoneme-bert-improving-bert-with-mixed-phoneme-and-sup-phoneme-representations-for-text-to-speech-2203.17190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixed-phoneme-bert-improving-bert-with-mixed-phoneme-and-sup-phoneme-representations-for-text-to-speech-2203.17190"/></url>
<url><loc>https://scifaro.com/en/abs/improved-relation-networks-for-end-to-end-speaker-verification-and-identification-2203.17218</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-relation-networks-for-end-to-end-speaker-verification-and-identification-2203.17218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-relation-networks-for-end-to-end-speaker-verification-and-identification-2203.17218"/></url>
<url><loc>https://scifaro.com/en/abs/importance-of-different-temporal-modulations-of-speech-a-tale-of-two-perspectives-2204.00065</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/importance-of-different-temporal-modulations-of-speech-a-tale-of-two-perspectives-2204.00065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/importance-of-different-temporal-modulations-of-speech-a-tale-of-two-perspectives-2204.00065"/></url>
<url><loc>https://scifaro.com/en/abs/universal-adaptor-converting-mel-spectrograms-between-different-configurations-for-speech-synthesis-2204.00170</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-adaptor-converting-mel-spectrograms-between-different-configurations-for-speech-synthesis-2204.00170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-adaptor-converting-mel-spectrograms-between-different-configurations-for-speech-synthesis-2204.00170"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-loss-for-unsupervised-multi-channel-source-separation-2204.00210</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-loss-for-unsupervised-multi-channel-source-separation-2204.00210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-loss-for-unsupervised-multi-channel-source-separation-2204.00210"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-speaker-asr-with-independent-vector-analysis-2204.00218</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-asr-with-independent-vector-analysis-2204.00218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-speaker-asr-with-independent-vector-analysis-2204.00218"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-confidence-gates-for-joint-training-of-se-and-asr-2204.00226</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-confidence-gates-for-joint-training-of-se-and-asr-2204.00226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-confidence-gates-for-joint-training-of-se-and-asr-2204.00226"/></url>
<url><loc>https://scifaro.com/en/abs/adaspeech-4-adaptive-text-to-speech-in-zero-shot-scenarios-2204.00436</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaspeech-4-adaptive-text-to-speech-in-zero-shot-scenarios-2204.00436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaspeech-4-adaptive-text-to-speech-in-zero-shot-scenarios-2204.00436"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-convolutive-matrix-factorization-for-articulatory-representation-decomposition-2204.00465</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-convolutive-matrix-factorization-for-articulatory-representation-decomposition-2204.00465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-convolutive-matrix-factorization-for-articulatory-representation-decomposition-2204.00465"/></url>
<url><loc>https://scifaro.com/en/abs/1-d-cnn-based-acoustic-scene-classification-via-reducing-layer-wise-dimensionality-2204.00555</loc><lastmod>2022-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1-d-cnn-based-acoustic-scene-classification-via-reducing-layer-wise-dimensionality-2204.00555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1-d-cnn-based-acoustic-scene-classification-via-reducing-layer-wise-dimensionality-2204.00555"/></url>
<url><loc>https://scifaro.com/en/abs/asr-data-augmentation-in-low-resource-settings-using-cross-lingual-multi-speaker-tts-and-cross-lingual-voice-conversion-2204.00618</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-data-augmentation-in-low-resource-settings-using-cross-lingual-multi-speaker-tts-and-cross-lingual-voice-conversion-2204.00618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-data-augmentation-in-low-resource-settings-using-cross-lingual-multi-speaker-tts-and-cross-lingual-voice-conversion-2204.00618"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-clustering-with-role-induced-constraints-for-speaker-diarization-2204.00657</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-clustering-with-role-induced-constraints-for-speaker-diarization-2204.00657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-clustering-with-role-induced-constraints-for-speaker-diarization-2204.00657"/></url>
<url><loc>https://scifaro.com/en/abs/vqtts-high-fidelity-text-to-speech-synthesis-with-self-supervised-vq-acoustic-feature-2204.00768</loc><lastmod>2024-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vqtts-high-fidelity-text-to-speech-synthesis-with-self-supervised-vq-acoustic-feature-2204.00768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vqtts-high-fidelity-text-to-speech-synthesis-with-self-supervised-vq-acoustic-feature-2204.00768"/></url>
<url><loc>https://scifaro.com/en/abs/fast-real-time-personalized-speech-enhancement-end-to-end-enhancement-network-e3net-and-knowledge-distillation-2204.00771</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-real-time-personalized-speech-enhancement-end-to-end-enhancement-network-e3net-and-knowledge-distillation-2204.00771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-real-time-personalized-speech-enhancement-end-to-end-enhancement-network-e3net-and-knowledge-distillation-2204.00771"/></url>
<url><loc>https://scifaro.com/en/abs/from-simulated-mixtures-to-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2204.00890</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-simulated-mixtures-to-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2204.00890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-simulated-mixtures-to-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2204.00890"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-dialect-density-estimation-for-african-american-english-2204.00967</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-dialect-density-estimation-for-african-american-english-2204.00967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-dialect-density-estimation-for-african-american-english-2204.00967"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-and-multi-scale-selective-kernel-attention-for-speaker-verification-2204.01005</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-and-multi-scale-selective-kernel-attention-for-speaker-verification-2204.01005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-and-multi-scale-selective-kernel-attention-for-speaker-verification-2204.01005"/></url>
<url><loc>https://scifaro.com/en/abs/into-tts-intonation-template-based-prosody-control-system-2204.01271</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/into-tts-intonation-template-based-prosody-control-system-2204.01271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/into-tts-intonation-template-based-prosody-control-system-2204.01271"/></url>
<url><loc>https://scifaro.com/en/abs/tplcnet-real-time-deep-packet-loss-concealment-in-the-time-domain-using-a-short-temporal-context-2204.01300</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tplcnet-real-time-deep-packet-loss-concealment-in-the-time-domain-using-a-short-temporal-context-2204.01300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tplcnet-real-time-deep-packet-loss-concealment-in-the-time-domain-using-a-short-temporal-context-2204.01300"/></url>
<url><loc>https://scifaro.com/en/abs/mosra-joint-mean-opinion-score-and-room-acoustics-speech-quality-assessment-2204.01345</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mosra-joint-mean-opinion-score-and-room-acoustics-speech-quality-assessment-2204.01345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mosra-joint-mean-opinion-score-and-room-acoustics-speech-quality-assessment-2204.01345"/></url>
<url><loc>https://scifaro.com/en/abs/target-confusion-in-end-to-end-speaker-extraction-analysis-and-approaches-2204.01355</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-confusion-in-end-to-end-speaker-extraction-analysis-and-approaches-2204.01355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-confusion-in-end-to-end-speaker-extraction-analysis-and-approaches-2204.01355"/></url>
<url><loc>https://scifaro.com/en/abs/anti-spoofing-using-transfer-learning-with-variational-information-bottleneck-2204.01387</loc><lastmod>2022-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anti-spoofing-using-transfer-learning-with-variational-information-bottleneck-2204.01387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anti-spoofing-using-transfer-learning-with-variational-information-bottleneck-2204.01387"/></url>
<url><loc>https://scifaro.com/en/abs/the-vicomtech-spoofing-aware-biometric-system-for-the-sasv-challenge-2204.01399</loc><lastmod>2022-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-vicomtech-spoofing-aware-biometric-system-for-the-sasv-challenge-2204.01399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-vicomtech-spoofing-aware-biometric-system-for-the-sasv-challenge-2204.01399"/></url>
<url><loc>https://scifaro.com/en/abs/robust-stuttering-detection-via-multi-task-and-adversarial-learning-2204.01735</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-stuttering-detection-via-multi-task-and-adversarial-learning-2204.01735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-stuttering-detection-via-multi-task-and-adversarial-learning-2204.01735"/></url>
<url><loc>https://scifaro.com/en/abs/dual-quaternion-ambisonics-array-for-six-degree-of-freedom-acoustic-representation-2204.01851</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-quaternion-ambisonics-array-for-six-degree-of-freedom-acoustic-representation-2204.01851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-quaternion-ambisonics-array-for-six-degree-of-freedom-acoustic-representation-2204.01851"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-data-selection-via-discrete-speech-representation-for-asr-2204.01981</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-data-selection-via-discrete-speech-representation-for-asr-2204.01981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-data-selection-via-discrete-speech-representation-for-asr-2204.01981"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-influence-of-fine-tuning-data-on-wav2vec-2-0-model-for-blind-speech-quality-prediction-2204.02135</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-influence-of-fine-tuning-data-on-wav2vec-2-0-model-for-blind-speech-quality-prediction-2204.02135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-influence-of-fine-tuning-data-on-wav2vec-2-0-model-for-blind-speech-quality-prediction-2204.02135"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-speech-representation-learning-based-on-factorized-hierarchical-variational-autoencoder-with-self-supervised-objective-2204.02166</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-speech-representation-learning-based-on-factorized-hierarchical-variational-autoencoder-with-self-supervised-objective-2204.02166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-speech-representation-learning-based-on-factorized-hierarchical-variational-autoencoder-with-self-supervised-objective-2204.02166"/></url>
<url><loc>https://scifaro.com/en/abs/complex-recurrent-variational-autoencoder-with-application-to-speech-enhancement-2204.02195</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-recurrent-variational-autoencoder-with-application-to-speech-enhancement-2204.02195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-recurrent-variational-autoencoder-with-application-to-speech-enhancement-2204.02195"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-deep-learning-mos-predictors-for-speech-synthesis-quality-2204.02249</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-deep-learning-mos-predictors-for-speech-synthesis-quality-2204.02249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-deep-learning-mos-predictors-for-speech-synthesis-quality-2204.02249"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-and-multimodal-abuse-detection-2204.02263</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-and-multimodal-abuse-detection-2204.02263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-and-multimodal-abuse-detection-2204.02263"/></url>
<url><loc>https://scifaro.com/en/abs/design-guidelines-for-inclusive-speaker-verification-evaluation-datasets-2204.02281</loc><lastmod>2022-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-guidelines-for-inclusive-speaker-verification-evaluation-datasets-2204.02281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-guidelines-for-inclusive-speaker-verification-evaluation-datasets-2204.02281"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-speech-separation-guided-diarization-for-telephone-conversations-2204.02306</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-speech-separation-guided-diarization-for-telephone-conversations-2204.02306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-speech-separation-guided-diarization-for-telephone-conversations-2204.02306"/></url>
<url><loc>https://scifaro.com/en/abs/hear-no-evil-towards-adversarial-robustness-of-automatic-speech-recognition-via-multi-task-learning-2204.02381</loc><lastmod>2022-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hear-no-evil-towards-adversarial-robustness-of-automatic-speech-recognition-via-multi-task-learning-2204.02381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hear-no-evil-towards-adversarial-robustness-of-automatic-speech-recognition-via-multi-task-learning-2204.02381"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speech-emotion-representations-in-the-quaternion-domain-2204.02385</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speech-emotion-representations-in-the-quaternion-domain-2204.02385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speech-emotion-representations-in-the-quaternion-domain-2204.02385"/></url>
<url><loc>https://scifaro.com/en/abs/global-hrtf-interpolation-via-learned-affine-transformation-of-hyper-conditioned-features-2204.02637</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-hrtf-interpolation-via-learned-affine-transformation-of-hyper-conditioned-features-2204.02637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-hrtf-interpolation-via-learned-affine-transformation-of-hyper-conditioned-features-2204.02637"/></url>
<url><loc>https://scifaro.com/en/abs/representation-selective-self-distillation-and-wav2vec-2-0-feature-exploration-for-spoof-aware-speaker-verification-2204.02639</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-selective-self-distillation-and-wav2vec-2-0-feature-exploration-for-spoof-aware-speaker-verification-2204.02639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-selective-self-distillation-and-wav2vec-2-0-feature-exploration-for-spoof-aware-speaker-verification-2204.02639"/></url>
<url><loc>https://scifaro.com/en/abs/customizable-end-to-end-optimization-of-online-neural-network-supported-dereverberation-for-hearing-devices-2204.02694</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/customizable-end-to-end-optimization-of-online-neural-network-supported-dereverberation-for-hearing-devices-2204.02694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/customizable-end-to-end-optimization-of-online-neural-network-supported-dereverberation-for-hearing-devices-2204.02694"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-augmented-kalman-filtering-for-robust-online-speech-dereverberation-in-noisy-reverberant-environments-2204.02741</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-augmented-kalman-filtering-for-robust-online-speech-dereverberation-in-noisy-reverberant-environments-2204.02741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-augmented-kalman-filtering-for-robust-online-speech-dereverberation-in-noisy-reverberant-environments-2204.02741"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-denoising-for-microphone-classification-2204.02841</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-denoising-for-microphone-classification-2204.02841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-denoising-for-microphone-classification-2204.02841"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-network-supported-two-stage-algorithm-for-lightweight-dereverberation-on-hearing-devices-2204.02978</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-network-supported-two-stage-algorithm-for-lightweight-dereverberation-on-hearing-devices-2204.02978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-network-supported-two-stage-algorithm-for-lightweight-dereverberation-on-hearing-devices-2204.02978"/></url>
<url><loc>https://scifaro.com/en/abs/musical-information-extraction-from-the-singing-voice-2204.03166</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-information-extraction-from-the-singing-voice-2204.03166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-information-extraction-from-the-singing-voice-2204.03166"/></url>
<url><loc>https://scifaro.com/en/abs/ddos-a-mos-prediction-framework-utilizing-domain-adaptive-pre-training-and-distribution-of-opinion-scores-2204.03219</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddos-a-mos-prediction-framework-utilizing-domain-adaptive-pre-training-and-distribution-of-opinion-scores-2204.03219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddos-a-mos-prediction-framework-utilizing-domain-adaptive-pre-training-and-distribution-of-opinion-scores-2204.03219"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-real-conversational-data-for-multi-channel-continuous-speech-separation-2204.03232</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-real-conversational-data-for-multi-channel-continuous-speech-separation-2204.03232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-real-conversational-data-for-multi-channel-continuous-speech-separation-2204.03232"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-quantized-prosody-representation-for-controllable-speech-synthesis-2204.03238</loc><lastmod>2022-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-quantized-prosody-representation-for-controllable-speech-synthesis-2204.03238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-quantized-prosody-representation-for-controllable-speech-synthesis-2204.03238"/></url>
<url><loc>https://scifaro.com/en/abs/mbi-net-a-non-intrusive-multi-branched-speech-intelligibility-prediction-model-for-hearing-aids-2204.03305</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mbi-net-a-non-intrusive-multi-branched-speech-intelligibility-prediction-model-for-hearing-aids-2204.03305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mbi-net-a-non-intrusive-multi-branched-speech-intelligibility-prediction-model-for-hearing-aids-2204.03305"/></url>
<url><loc>https://scifaro.com/en/abs/music-robust-automatic-lyrics-transcription-of-polyphonic-music-2204.03306</loc><lastmod>2022-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-robust-automatic-lyrics-transcription-of-polyphonic-music-2204.03306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-robust-automatic-lyrics-transcription-of-polyphonic-music-2204.03306"/></url>
<url><loc>https://scifaro.com/en/abs/mti-net-a-multi-target-speech-intelligibility-prediction-model-2204.03310</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mti-net-a-multi-target-speech-intelligibility-prediction-model-2204.03310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mti-net-a-multi-target-speech-intelligibility-prediction-model-2204.03310"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-self-supervised-embeddings-for-speech-enhancement-2204.03339</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-self-supervised-embeddings-for-speech-enhancement-2204.03339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-self-supervised-embeddings-for-speech-enhancement-2204.03339"/></url>
<url><loc>https://scifaro.com/en/abs/correcting-mispronunciations-in-speech-using-spectrogram-inpainting-2204.03379</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correcting-mispronunciations-in-speech-using-spectrogram-inpainting-2204.03379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correcting-mispronunciations-in-speech-using-spectrogram-inpainting-2204.03379"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-dysfluencies-in-stuttering-therapy-using-wav2vec-2-0-2204.03417</loc><lastmod>2026-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-dysfluencies-in-stuttering-therapy-using-wav2vec-2-0-2204.03417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-dysfluencies-in-stuttering-therapy-using-wav2vec-2-0-2204.03417"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-vocal-fatigue-with-neural-embeddings-2204.03428</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-vocal-fatigue-with-neural-embeddings-2204.03428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-vocal-fatigue-with-neural-embeddings-2204.03428"/></url>
<url><loc>https://scifaro.com/en/abs/personal-vad-2-0-optimizing-personal-voice-activity-detection-for-on-device-speech-recognition-2204.03793</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personal-vad-2-0-optimizing-personal-voice-activity-detection-for-on-device-speech-recognition-2204.03793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personal-vad-2-0-optimizing-personal-voice-activity-detection-for-on-device-speech-recognition-2204.03793"/></url>
<url><loc>https://scifaro.com/en/abs/advest-adversarial-perturbation-estimation-to-classify-and-detect-adversarial-attacks-against-speaker-identification-2204.03848</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advest-adversarial-perturbation-estimation-to-classify-and-detect-adversarial-attacks-against-speaker-identification-2204.03848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advest-adversarial-perturbation-estimation-to-classify-and-detect-adversarial-attacks-against-speaker-identification-2204.03848"/></url>
<url><loc>https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-hybrid-speech-recognition-using-joint-adversarial-fine-tuning-with-denoiser-2204.03851</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-hybrid-speech-recognition-using-joint-adversarial-fine-tuning-with-denoiser-2204.03851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defense-against-adversarial-attacks-on-hybrid-speech-recognition-using-joint-adversarial-fine-tuning-with-denoiser-2204.03851"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-softmax-for-end-to-end-low-resource-multilingual-speech-recognition-2204.03855</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-softmax-for-end-to-end-low-resource-multilingual-speech-recognition-2204.03855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-softmax-for-end-to-end-low-resource-multilingual-speech-recognition-2204.03855"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-pronunciation-assessment-using-self-supervised-speech-representation-learning-2204.03863</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-pronunciation-assessment-using-self-supervised-speech-representation-learning-2204.03863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-pronunciation-assessment-using-self-supervised-speech-representation-learning-2204.03863"/></url>
<url><loc>https://scifaro.com/en/abs/soundbeam-target-sound-extraction-conditioned-on-sound-class-labels-and-enrollment-clues-for-increased-performance-and-continuous-learning-2204.03895</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundbeam-target-sound-extraction-conditioned-on-sound-class-labels-and-enrollment-clues-for-increased-performance-and-continuous-learning-2204.03895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundbeam-target-sound-extraction-conditioned-on-sound-class-labels-and-enrollment-clues-for-increased-performance-and-continuous-learning-2204.03895"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-transformer-s-potential-on-automatic-piano-transcription-2204.03898</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-transformer-s-potential-on-automatic-piano-transcription-2204.03898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-transformer-s-potential-on-automatic-piano-transcription-2204.03898"/></url>
<url><loc>https://scifaro.com/en/abs/scoring-of-large-margin-embeddings-for-speaker-verification-cosine-or-plda-2204.03965</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scoring-of-large-margin-embeddings-for-speaker-verification-cosine-or-plda-2204.03965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scoring-of-large-margin-embeddings-for-speaker-verification-cosine-or-plda-2204.03965"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-and-multi-scale-variational-autoencoder-for-diverse-and-natural-non-autoregressive-text-to-speech-2204.04004</loc><lastmod>2022-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-and-multi-scale-variational-autoencoder-for-diverse-and-natural-non-autoregressive-text-to-speech-2204.04004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-and-multi-scale-variational-autoencoder-for-diverse-and-natural-non-autoregressive-text-to-speech-2204.04004"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-transformations-of-voice-level-in-singing-voice-2204.04006</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-transformations-of-voice-level-in-singing-voice-2204.04006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-transformations-of-voice-level-in-singing-voice-2204.04006"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-latent-speech-representation-for-automatic-pathological-intelligibility-assessment-2204.04016</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-latent-speech-representation-for-automatic-pathological-intelligibility-assessment-2204.04016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-latent-speech-representation-for-automatic-pathological-intelligibility-assessment-2204.04016"/></url>
<url><loc>https://scifaro.com/en/abs/declipping-of-speech-signals-using-frequency-selective-extrapolation-2204.04068</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/declipping-of-speech-signals-using-frequency-selective-extrapolation-2204.04068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/declipping-of-speech-signals-using-frequency-selective-extrapolation-2204.04068"/></url>
<url><loc>https://scifaro.com/en/abs/karaoker-alignment-free-singing-voice-synthesis-with-speech-training-data-2204.04127</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/karaoker-alignment-free-singing-voice-synthesis-with-speech-training-data-2204.04127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/karaoker-alignment-free-singing-voice-synthesis-with-speech-training-data-2204.04127"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-data-augmentation-selection-and-parametrization-in-contrastive-self-supervised-speech-representation-learning-2204.04170</loc><lastmod>2022-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-data-augmentation-selection-and-parametrization-in-contrastive-self-supervised-speech-representation-learning-2204.04170"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-data-augmentation-selection-and-parametrization-in-contrastive-self-supervised-speech-representation-learning-2204.04170"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-based-data-augmentation-for-end-to-end-automatic-speech-recognition-2204.04284</loc><lastmod>2022-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-based-data-augmentation-for-end-to-end-automatic-speech-recognition-2204.04284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-based-data-augmentation-for-end-to-end-automatic-speech-recognition-2204.04284"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-hidden-representations-from-a-dnn-based-speech-recogniser-for-speech-intelligibility-prediction-in-hearing-impaired-listeners-2204.04287</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-hidden-representations-from-a-dnn-based-speech-recogniser-for-speech-intelligibility-prediction-in-hearing-impaired-listeners-2204.04287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-hidden-representations-from-a-dnn-based-speech-recogniser-for-speech-intelligibility-prediction-in-hearing-impaired-listeners-2204.04287"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-uncertainty-measures-of-automatic-speech-recognition-for-non-intrusive-speech-intelligibility-prediction-2204.04288</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-uncertainty-measures-of-automatic-speech-recognition-for-non-intrusive-speech-intelligibility-prediction-2204.04288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-uncertainty-measures-of-automatic-speech-recognition-for-non-intrusive-speech-intelligibility-prediction-2204.04288"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-using-cepstrogram-for-countermeasure-against-replay-attacks-2204.04333</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-using-cepstrogram-for-countermeasure-against-replay-attacks-2204.04333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-using-cepstrogram-for-countermeasure-against-replay-attacks-2204.04333"/></url>
<url><loc>https://scifaro.com/en/abs/quiko-a-quantum-beat-generation-application-2204.04370</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quiko-a-quantum-beat-generation-application-2204.04370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quiko-a-quantum-beat-generation-application-2204.04370"/></url>
<url><loc>https://scifaro.com/en/abs/listen-only-to-me-how-well-can-target-speech-extraction-handle-false-alarms-2204.04811</loc><lastmod>2022-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-only-to-me-how-well-can-target-speech-extraction-handle-false-alarms-2204.04811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-only-to-me-how-well-can-target-speech-extraction-handle-false-alarms-2204.04811"/></url>
<url><loc>https://scifaro.com/en/abs/the-partialspoof-database-and-countermeasures-for-the-detection-of-short-fake-speech-segments-embedded-in-an-utterance-2204.05177</loc><lastmod>2023-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-partialspoof-database-and-countermeasures-for-the-detection-of-short-fake-speech-segments-embedded-in-an-utterance-2204.05177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-partialspoof-database-and-countermeasures-for-the-detection-of-short-fake-speech-segments-embedded-in-an-utterance-2204.05177"/></url>
<url><loc>https://scifaro.com/en/abs/a-wav2vec2-based-experimental-study-on-self-supervised-learning-methods-to-improve-child-speech-recognition-2204.05419</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-wav2vec2-based-experimental-study-on-self-supervised-learning-methods-to-improve-child-speech-recognition-2204.05419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-wav2vec2-based-experimental-study-on-self-supervised-learning-methods-to-improve-child-speech-recognition-2204.05419"/></url>
<url><loc>https://scifaro.com/en/abs/correctspeech-a-fully-automated-system-for-speech-correction-and-accent-reduction-2204.05460</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/correctspeech-a-fully-automated-system-for-speech-correction-and-accent-reduction-2204.05460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/correctspeech-a-fully-automated-system-for-speech-correction-and-accent-reduction-2204.05460"/></url>
<url><loc>https://scifaro.com/en/abs/low-latency-time-domain-multichannel-speech-and-music-source-separation-2204.05609</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-latency-time-domain-multichannel-speech-and-music-source-separation-2204.05609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-latency-time-domain-multichannel-speech-and-music-source-separation-2204.05609"/></url>
<url><loc>https://scifaro.com/en/abs/text-driven-separation-of-arbitrary-sounds-2204.05738</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-driven-separation-of-arbitrary-sounds-2204.05738"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-driven-separation-of-arbitrary-sounds-2204.05738"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-pitch-controllability-using-timbre-preserving-pitch-augmentation-in-fastpitch-2204.05753</loc><lastmod>2022-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-pitch-controllability-using-timbre-preserving-pitch-augmentation-in-fastpitch-2204.05753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-pitch-controllability-using-timbre-preserving-pitch-augmentation-in-fastpitch-2204.05753"/></url>
<url><loc>https://scifaro.com/en/abs/voicefixer-a-unified-framework-for-high-fidelity-speech-restoration-2204.05841</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicefixer-a-unified-framework-for-high-fidelity-speech-restoration-2204.05841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicefixer-a-unified-framework-for-high-fidelity-speech-restoration-2204.05841"/></url>
<url><loc>https://scifaro.com/en/abs/a-post-auto-regressive-gan-vocoder-focused-on-spectrum-fracture-2204.06086</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-post-auto-regressive-gan-vocoder-focused-on-spectrum-fracture-2204.06086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-post-auto-regressive-gan-vocoder-focused-on-spectrum-fracture-2204.06086"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-cascaded-encoder-asr-model-for-dynamic-model-sizes-2204.06164</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-cascaded-encoder-asr-model-for-dynamic-model-sizes-2204.06164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-cascaded-encoder-asr-model-for-dynamic-model-sizes-2204.06164"/></url>
<url><loc>https://scifaro.com/en/abs/production-federated-keyword-spotting-via-distillation-filtering-and-joint-federated-centralized-training-2204.06322</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/production-federated-keyword-spotting-via-distillation-filtering-and-joint-federated-centralized-training-2204.06322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/production-federated-keyword-spotting-via-distillation-filtering-and-joint-federated-centralized-training-2204.06322"/></url>
<url><loc>https://scifaro.com/en/abs/behm-gan-bandwidth-extension-of-historical-music-using-generative-adversarial-networks-2204.06478</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/behm-gan-bandwidth-extension-of-historical-music-using-generative-adversarial-networks-2204.06478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/behm-gan-bandwidth-extension-of-historical-music-using-generative-adversarial-networks-2204.06478"/></url>
<url><loc>https://scifaro.com/en/abs/lombard-effect-for-bilingual-speakers-in-cantonese-and-english-importance-of-spectro-temporal-features-2204.06907</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lombard-effect-for-bilingual-speakers-in-cantonese-and-english-importance-of-spectro-temporal-features-2204.06907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lombard-effect-for-bilingual-speakers-in-cantonese-and-english-importance-of-spectro-temporal-features-2204.06907"/></url>
<url><loc>https://scifaro.com/en/abs/radioses-mmwave-based-audioradio-speech-enhancement-and-separation-system-2204.07092</loc><lastmod>2022-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/radioses-mmwave-based-audioradio-speech-enhancement-and-separation-system-2204.07092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/radioses-mmwave-based-audioradio-speech-enhancement-and-separation-system-2204.07092"/></url>
<url><loc>https://scifaro.com/en/abs/anomalous-sound-detection-based-on-machine-activity-detection-2204.07353</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-machine-activity-detection-2204.07353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anomalous-sound-detection-based-on-machine-activity-detection-2204.07353"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-aware-mixture-of-mixtures-training-for-weakly-supervised-speaker-extraction-2204.07375</loc><lastmod>2022-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-aware-mixture-of-mixtures-training-for-weakly-supervised-speaker-extraction-2204.07375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-aware-mixture-of-mixtures-training-for-weakly-supervised-speaker-extraction-2204.07375"/></url>
<url><loc>https://scifaro.com/en/abs/byol-for-audio-exploring-pre-trained-general-purpose-audio-representations-2204.07402</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/byol-for-audio-exploring-pre-trained-general-purpose-audio-representations-2204.07402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/byol-for-audio-exploring-pre-trained-general-purpose-audio-representations-2204.07402"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-methodology-for-considering-feasibility-and-pairwise-likelihood-in-deep-learning-based-guitar-tablature-transcription-systems-2204.08094</loc><lastmod>2022-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-methodology-for-considering-feasibility-and-pairwise-likelihood-in-deep-learning-based-guitar-tablature-transcription-systems-2204.08094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-methodology-for-considering-feasibility-and-pairwise-likelihood-in-deep-learning-based-guitar-tablature-transcription-systems-2204.08094"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-adversarial-voice-conversion-for-add-2022-2204.08692</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-adversarial-voice-conversion-for-add-2022-2204.08692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-adversarial-voice-conversion-for-add-2022-2204.08692"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deep-fake-detection-system-with-neural-stitching-for-add-2022-2204.08720</loc><lastmod>2022-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deep-fake-detection-system-with-neural-stitching-for-add-2022-2204.08720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deep-fake-detection-system-with-neural-stitching-for-add-2022-2204.08720"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2204.08765</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2204.08765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2204.08765"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-monotonic-transducers-for-large-scale-automatic-speech-recognition-2204.08858</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-monotonic-transducers-for-large-scale-automatic-speech-recognition-2204.08858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-monotonic-transducers-for-large-scale-automatic-speech-recognition-2204.08858"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-with-generative-flow-2204.09079</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-with-generative-flow-2204.09079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-with-generative-flow-2204.09079"/></url>
<url><loc>https://scifaro.com/en/abs/fastdiff-a-fast-conditional-diffusion-model-for-high-quality-speech-synthesis-2204.09934</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastdiff-a-fast-conditional-diffusion-model-for-high-quality-speech-synthesis-2204.09934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastdiff-a-fast-conditional-diffusion-model-for-high-quality-speech-synthesis-2204.09934"/></url>
<url><loc>https://scifaro.com/en/abs/cross-speaker-emotion-transfer-for-low-resource-text-to-speech-using-non-parallel-voice-conversion-with-pitch-shift-data-augmentation-2204.10020</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-for-low-resource-text-to-speech-using-non-parallel-voice-conversion-with-pitch-shift-data-augmentation-2204.10020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-speaker-emotion-transfer-for-low-resource-text-to-speech-using-non-parallel-voice-conversion-with-pitch-shift-data-augmentation-2204.10020"/></url>
<url><loc>https://scifaro.com/en/abs/gated-multimodal-fusion-with-contrastive-learning-for-turn-taking-prediction-in-human-robot-dialogue-2204.10172</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gated-multimodal-fusion-with-contrastive-learning-for-turn-taking-prediction-in-human-robot-dialogue-2204.10172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gated-multimodal-fusion-with-contrastive-learning-for-turn-taking-prediction-in-human-robot-dialogue-2204.10172"/></url>
<url><loc>https://scifaro.com/en/abs/the-nist-cts-speaker-recognition-challenge-2204.10228</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nist-cts-speaker-recognition-challenge-2204.10228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nist-cts-speaker-recognition-challenge-2204.10228"/></url>
<url><loc>https://scifaro.com/en/abs/the-2021-nist-speaker-recognition-evaluation-2204.10242</loc><lastmod>2022-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-2021-nist-speaker-recognition-evaluation-2204.10242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-2021-nist-speaker-recognition-evaluation-2204.10242"/></url>
<url><loc>https://scifaro.com/en/abs/improving-self-supervised-learning-based-mos-prediction-networks-2204.11030</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-self-supervised-learning-based-mos-prediction-networks-2204.11030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-self-supervised-learning-based-mos-prediction-networks-2204.11030"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-separation-consistency-training-for-adaptation-of-unsupervised-speech-separation-2204.11032</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-separation-consistency-training-for-adaptation-of-unsupervised-speech-separation-2204.11032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-separation-consistency-training-for-adaptation-of-unsupervised-speech-separation-2204.11032"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-speaker-identification-using-depthwise-separable-convolutional-network-with-channel-attention-2204.11180</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-speaker-identification-using-depthwise-separable-convolutional-network-with-channel-attention-2204.11180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-speaker-identification-using-depthwise-separable-convolutional-network-with-channel-attention-2204.11180"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-naturalness-of-simulated-conversations-for-end-to-end-neural-diarization-2204.11232</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-naturalness-of-simulated-conversations-for-end-to-end-neural-diarization-2204.11232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-naturalness-of-simulated-conversations-for-end-to-end-neural-diarization-2204.11232"/></url>
<url><loc>https://scifaro.com/en/abs/improved-far-field-speech-recognition-using-joint-variational-autoencoder-2204.11286</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-far-field-speech-recognition-using-joint-variational-autoencoder-2204.11286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-far-field-speech-recognition-using-joint-variational-autoencoder-2204.11286"/></url>
<url><loc>https://scifaro.com/en/abs/graph-convolutional-network-based-semi-supervised-learning-on-multi-speaker-meeting-data-2204.11501</loc><lastmod>2022-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-convolutional-network-based-semi-supervised-learning-on-multi-speaker-meeting-data-2204.11501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-convolutional-network-based-semi-supervised-learning-on-multi-speaker-meeting-data-2204.11501"/></url>
<url><loc>https://scifaro.com/en/abs/cleanformer-a-multichannel-array-configuration-invariant-neural-enhancement-frontend-for-asr-in-smart-speakers-2204.11933</loc><lastmod>2023-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cleanformer-a-multichannel-array-configuration-invariant-neural-enhancement-frontend-for-asr-in-smart-speakers-2204.11933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cleanformer-a-multichannel-array-configuration-invariant-neural-enhancement-frontend-for-asr-in-smart-speakers-2204.11933"/></url>
<url><loc>https://scifaro.com/en/abs/atst-audio-representation-learning-with-teacher-student-transformer-2204.12076</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/atst-audio-representation-learning-with-teacher-student-transformer-2204.12076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/atst-audio-representation-learning-with-teacher-student-transformer-2204.12076"/></url>
<url><loc>https://scifaro.com/en/abs/mask-scalar-prediction-for-improving-robust-automatic-speech-recognition-2204.12092</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-scalar-prediction-for-improving-robust-automatic-speech-recognition-2204.12092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-scalar-prediction-for-improving-robust-automatic-speech-recognition-2204.12092"/></url>
<url><loc>https://scifaro.com/en/abs/masked-spectrogram-modeling-using-masked-autoencoders-for-learning-general-purpose-audio-representation-2204.12260</loc><lastmod>2023-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-spectrogram-modeling-using-masked-autoencoders-for-learning-general-purpose-audio-representation-2204.12260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-spectrogram-modeling-using-masked-autoencoders-for-learning-general-purpose-audio-representation-2204.12260"/></url>
<url><loc>https://scifaro.com/en/abs/low-dimensional-representation-of-infant-and-adult-vocalization-acoustics-2204.12279</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-dimensional-representation-of-infant-and-adult-vocalization-acoustics-2204.12279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-dimensional-representation-of-infant-and-adult-vocalization-acoustics-2204.12279"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-attention-in-sequence-to-sequence-models-for-speech-recognition-2204.12308</loc><lastmod>2022-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-attention-in-sequence-to-sequence-models-for-speech-recognition-2204.12308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-attention-in-sequence-to-sequence-models-for-speech-recognition-2204.12308"/></url>
<url><loc>https://scifaro.com/en/abs/study-on-the-fairness-of-speaker-verification-systems-on-underrepresented-accents-in-english-2204.12649</loc><lastmod>2025-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-on-the-fairness-of-speaker-verification-systems-on-underrepresented-accents-in-english-2204.12649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-on-the-fairness-of-speaker-verification-systems-on-underrepresented-accents-in-english-2204.12649"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-fast-speech-separation-model-with-teacher-student-learning-2204.12777</loc><lastmod>2022-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-fast-speech-separation-model-with-teacher-student-learning-2204.12777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-fast-speech-separation-model-with-teacher-student-learning-2204.12777"/></url>
<url><loc>https://scifaro.com/en/abs/autonomous-in-situ-soundscape-augmentation-via-joint-selection-of-masker-and-gain-2204.13883</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autonomous-in-situ-soundscape-augmentation-via-joint-selection-of-masker-and-gain-2204.13883"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autonomous-in-situ-soundscape-augmentation-via-joint-selection-of-masker-and-gain-2204.13883"/></url>
<url><loc>https://scifaro.com/en/abs/deployment-of-an-iot-system-for-adaptive-in-situ-soundscape-augmentation-2204.13890</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deployment-of-an-iot-system-for-adaptive-in-situ-soundscape-augmentation-2204.13890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deployment-of-an-iot-system-for-adaptive-in-situ-soundscape-augmentation-2204.13890"/></url>
<url><loc>https://scifaro.com/en/abs/baselines-and-protocols-for-household-speaker-recognition-2205.00288</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/baselines-and-protocols-for-household-speaker-recognition-2205.00288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/baselines-and-protocols-for-household-speaker-recognition-2205.00288"/></url>
<url><loc>https://scifaro.com/en/abs/a-meeting-transcription-system-for-an-ad-hoc-acoustic-sensor-network-2205.00944</loc><lastmod>2022-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-meeting-transcription-system-for-an-ad-hoc-acoustic-sensor-network-2205.00944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-meeting-transcription-system-for-an-ad-hoc-acoustic-sensor-network-2205.00944"/></url>
<url><loc>https://scifaro.com/en/abs/improving-dual-microphone-speech-enhancement-by-learning-cross-channel-features-with-multi-head-attention-2205.01280</loc><lastmod>2022-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-dual-microphone-speech-enhancement-by-learning-cross-channel-features-with-multi-head-attention-2205.01280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-dual-microphone-speech-enhancement-by-learning-cross-channel-features-with-multi-head-attention-2205.01280"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-dynamic-filter-for-robust-and-low-computational-feature-extraction-2205.01304</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-dynamic-filter-for-robust-and-low-computational-feature-extraction-2205.01304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-dynamic-filter-for-robust-and-low-computational-feature-extraction-2205.01304"/></url>
<url><loc>https://scifaro.com/en/abs/attentive-activation-function-for-improving-end-to-end-spoofing-countermeasure-systems-2205.01528</loc><lastmod>2022-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentive-activation-function-for-improving-end-to-end-spoofing-countermeasure-systems-2205.01528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentive-activation-function-for-improving-end-to-end-spoofing-countermeasure-systems-2205.01528"/></url>
<url><loc>https://scifaro.com/en/abs/the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2205.01780</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2205.01780"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-icml-2022-expressive-vocalizations-workshop-and-competition-recognizing-generating-and-personalizing-vocal-bursts-2205.01780"/></url>
<url><loc>https://scifaro.com/en/abs/virtual-analog-modeling-of-distortion-circuits-using-neural-ordinary-differential-equations-2205.01897</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/virtual-analog-modeling-of-distortion-circuits-using-neural-ordinary-differential-equations-2205.01897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/virtual-analog-modeling-of-distortion-circuits-using-neural-ordinary-differential-equations-2205.01897"/></url>
<url><loc>https://scifaro.com/en/abs/does-a-pesqnet-loss-require-a-clean-reference-input-the-original-pesq-does-but-acr-listening-tests-don-t-2205.02085</loc><lastmod>2022-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-a-pesqnet-loss-require-a-clean-reference-input-the-original-pesq-does-but-acr-listening-tests-don-t-2205.02085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-a-pesqnet-loss-require-a-clean-reference-input-the-original-pesq-does-but-acr-listening-tests-don-t-2205.02085"/></url>
<url><loc>https://scifaro.com/en/abs/region-to-region-kernel-interpolation-of-acoustic-transfer-function-with-directional-weighting-2205.02750</loc><lastmod>2022-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/region-to-region-kernel-interpolation-of-acoustic-transfer-function-with-directional-weighting-2205.02750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/region-to-region-kernel-interpolation-of-acoustic-transfer-function-with-directional-weighting-2205.02750"/></url>
<url><loc>https://scifaro.com/en/abs/a-conformer-based-waveform-domain-neural-acoustic-echo-canceller-optimized-for-asr-accuracy-2205.03481</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-conformer-based-waveform-domain-neural-acoustic-echo-canceller-optimized-for-asr-accuracy-2205.03481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-conformer-based-waveform-domain-neural-acoustic-echo-canceller-optimized-for-asr-accuracy-2205.03481"/></url>
<url><loc>https://scifaro.com/en/abs/mask-based-neural-beamforming-for-moving-speakers-with-self-attention-based-tracking-2205.03568</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-based-neural-beamforming-for-moving-speakers-with-self-attention-based-tracking-2205.03568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-based-neural-beamforming-for-moving-speakers-with-self-attention-based-tracking-2205.03568"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-echo-suppression-using-a-learning-based-multi-frame-minimum-variance-distortionless-response-filter-2205.03594</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-echo-suppression-using-a-learning-based-multi-frame-minimum-variance-distortionless-response-filter-2205.03594"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-echo-suppression-using-a-learning-based-multi-frame-minimum-variance-distortionless-response-filter-2205.03594"/></url>
<url><loc>https://scifaro.com/en/abs/recab-vae-gumbel-softmax-variational-inference-based-on-analytic-divergence-2205.04104</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recab-vae-gumbel-softmax-variational-inference-based-on-analytic-divergence-2205.04104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recab-vae-gumbel-softmax-variational-inference-based-on-analytic-divergence-2205.04104"/></url>
<url><loc>https://scifaro.com/en/abs/bandwidth-scalable-fully-mask-based-deep-fcrn-acoustic-echo-cancellation-and-postfiltering-2205.04276</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bandwidth-scalable-fully-mask-based-deep-fcrn-acoustic-echo-cancellation-and-postfiltering-2205.04276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bandwidth-scalable-fully-mask-based-deep-fcrn-acoustic-echo-cancellation-and-postfiltering-2205.04276"/></url>
<url><loc>https://scifaro.com/en/abs/naturalspeech-end-to-end-text-to-speech-synthesis-with-human-level-quality-2205.04421</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturalspeech-end-to-end-text-to-speech-synthesis-with-human-level-quality-2205.04421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturalspeech-end-to-end-text-to-speech-synthesis-with-human-level-quality-2205.04421"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-reinforcement-using-target-source-extraction-for-robust-automatic-speech-recognition-2205.04433</loc><lastmod>2022-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-reinforcement-using-target-source-extraction-for-robust-automatic-speech-recognition-2205.04433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-reinforcement-using-target-source-extraction-for-robust-automatic-speech-recognition-2205.04433"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-enabled-semantic-communications-with-speech-recognition-and-synthesis-2205.04603</loc><lastmod>2023-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-enabled-semantic-communications-with-speech-recognition-and-synthesis-2205.04603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-enabled-semantic-communications-with-speech-recognition-and-synthesis-2205.04603"/></url>
<url><loc>https://scifaro.com/en/abs/preliminary-assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2205.04728</loc><lastmod>2022-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preliminary-assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2205.04728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preliminary-assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2205.04728"/></url>
<url><loc>https://scifaro.com/en/abs/separator-transducer-segmenter-streaming-recognition-and-segmentation-of-multi-party-speech-2205.05199</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separator-transducer-segmenter-streaming-recognition-and-segmentation-of-multi-party-speech-2205.05199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separator-transducer-segmenter-streaming-recognition-and-segmentation-of-multi-party-speech-2205.05199"/></url>
<url><loc>https://scifaro.com/en/abs/best-of-both-worlds-multi-task-audio-visual-automatic-speech-recognition-and-active-speaker-detection-2205.05206</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/best-of-both-worlds-multi-task-audio-visual-automatic-speech-recognition-and-active-speaker-detection-2205.05206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/best-of-both-worlds-multi-task-audio-visual-automatic-speech-recognition-and-active-speaker-detection-2205.05206"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improved-zero-shot-voice-conversion-with-conditional-dsvae-2205.05227</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improved-zero-shot-voice-conversion-with-conditional-dsvae-2205.05227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improved-zero-shot-voice-conversion-with-conditional-dsvae-2205.05227"/></url>
<url><loc>https://scifaro.com/en/abs/deepfilternet2-towards-real-time-speech-enhancement-on-embedded-devices-for-full-band-audio-2205.05474</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfilternet2-towards-real-time-speech-enhancement-on-embedded-devices-for-full-band-audio-2205.05474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfilternet2-towards-real-time-speech-enhancement-on-embedded-devices-for-full-band-audio-2205.05474"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-griffin-lim-improved-iterative-phase-retrieval-for-speech-2205.05496</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-griffin-lim-improved-iterative-phase-retrieval-for-speech-2205.05496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-griffin-lim-improved-iterative-phase-retrieval-for-speech-2205.05496"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-representation-learning-speech-enhancement-method-using-beta-vae-2205.05581</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-representation-learning-speech-enhancement-method-using-beta-vae-2205.05581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-representation-learning-speech-enhancement-method-using-beta-vae-2205.05581"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-multi-person-audio-visual-automatic-speech-recognition-2205.05586</loc><lastmod>2022-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-multi-person-audio-visual-automatic-speech-recognition-2205.05586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-multi-person-audio-visual-automatic-speech-recognition-2205.05586"/></url>
<url><loc>https://scifaro.com/en/abs/a-closer-look-at-audio-visual-multi-person-speech-recognition-and-active-speaker-selection-2205.05684</loc><lastmod>2022-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-closer-look-at-audio-visual-multi-person-speech-recognition-and-active-speaker-selection-2205.05684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-closer-look-at-audio-visual-multi-person-speech-recognition-and-active-speaker-selection-2205.05684"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-packet-loss-concealment-with-mixed-generative-and-predictive-model-2205.05785</loc><lastmod>2022-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-packet-loss-concealment-with-mixed-generative-and-predictive-model-2205.05785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-packet-loss-concealment-with-mixed-generative-and-predictive-model-2205.05785"/></url>
<url><loc>https://scifaro.com/en/abs/automated-audio-captioning-an-overview-of-recent-progress-and-new-challenges-2205.05949</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-audio-captioning-an-overview-of-recent-progress-and-new-challenges-2205.05949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-audio-captioning-an-overview-of-recent-progress-and-new-challenges-2205.05949"/></url>
<url><loc>https://scifaro.com/en/abs/training-strategies-for-own-voice-reconstruction-in-hearing-protection-devices-using-an-in-ear-microphone-2205.06157</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-strategies-for-own-voice-reconstruction-in-hearing-protection-devices-using-an-in-ear-microphone-2205.06157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-strategies-for-own-voice-reconstruction-in-hearing-protection-devices-using-an-in-ear-microphone-2205.06157"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-adversarial-data-augmentation-for-dysarthric-and-elderly-speech-recognition-2205.06445</loc><lastmod>2025-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-adversarial-data-augmentation-for-dysarthric-and-elderly-speech-recognition-2205.06445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-adversarial-data-augmentation-for-dysarthric-and-elderly-speech-recognition-2205.06445"/></url>
<url><loc>https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-blind-source-extraction-based-on-independent-vector-extraction-2205.06473</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-blind-source-extraction-based-on-independent-vector-extraction-2205.06473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-acoustic-echo-cancellation-and-blind-source-extraction-based-on-independent-vector-extraction-2205.06473"/></url>
<url><loc>https://scifaro.com/en/abs/task-splitting-for-dnn-based-acoustic-echo-and-noise-removal-2205.06931</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-splitting-for-dnn-based-acoustic-echo-and-noise-removal-2205.06931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-splitting-for-dnn-based-acoustic-echo-and-noise-removal-2205.06931"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-approaches-for-spoken-language-recognition-taltech-submission-to-the-olr-2021-challenge-2205.07083</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-approaches-for-spoken-language-recognition-taltech-submission-to-the-olr-2021-challenge-2205.07083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-approaches-for-spoken-language-recognition-taltech-submission-to-the-olr-2021-challenge-2205.07083"/></url>
<url><loc>https://scifaro.com/en/abs/collar-aware-training-for-streaming-speaker-change-detection-in-broadcast-speech-2205.07086</loc><lastmod>2022-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collar-aware-training-for-streaming-speaker-change-detection-in-broadcast-speech-2205.07086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collar-aware-training-for-streaming-speaker-change-detection-in-broadcast-speech-2205.07086"/></url>
<url><loc>https://scifaro.com/en/abs/learning-lip-based-audio-visual-speaker-embeddings-with-av-hubert-2205.07180</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-lip-based-audio-visual-speaker-embeddings-with-av-hubert-2205.07180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-lip-based-audio-visual-speaker-embeddings-with-av-hubert-2205.07180"/></url>
<url><loc>https://scifaro.com/en/abs/generspeech-towards-style-transfer-for-generalizable-out-of-domain-text-to-speech-2205.07211</loc><lastmod>2022-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generspeech-towards-style-transfer-for-generalizable-out-of-domain-text-to-speech-2205.07211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generspeech-towards-style-transfer-for-generalizable-out-of-domain-text-to-speech-2205.07211"/></url>
<url><loc>https://scifaro.com/en/abs/learning-representations-for-new-sound-classes-with-continual-self-supervised-learning-2205.07390</loc><lastmod>2023-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-representations-for-new-sound-classes-with-continual-self-supervised-learning-2205.07390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-representations-for-new-sound-classes-with-continual-self-supervised-learning-2205.07390"/></url>
<url><loc>https://scifaro.com/en/abs/accented-speech-recognition-benchmarking-pre-training-and-diverse-data-2205.08014</loc><lastmod>2022-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accented-speech-recognition-benchmarking-pre-training-and-diverse-data-2205.08014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accented-speech-recognition-benchmarking-pre-training-and-diverse-data-2205.08014"/></url>
<url><loc>https://scifaro.com/en/abs/composing-general-audio-representation-by-fusing-multilayer-features-of-a-pre-trained-model-2205.08138</loc><lastmod>2022-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/composing-general-audio-representation-by-fusing-multilayer-features-of-a-pre-trained-model-2205.08138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/composing-general-audio-representation-by-fusing-multilayer-features-of-a-pre-trained-model-2205.08138"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-noise-context-aware-enhancement-for-automatic-speech-recognition-in-multi-talker-environments-2205.08555</loc><lastmod>2022-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-noise-context-aware-enhancement-for-automatic-speech-recognition-in-multi-talker-environments-2205.08555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-noise-context-aware-enhancement-for-automatic-speech-recognition-in-multi-talker-environments-2205.08555"/></url>
<url><loc>https://scifaro.com/en/abs/u-former-improving-monaural-speech-enhancement-with-multi-head-self-and-cross-attention-2205.08681</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u-former-improving-monaural-speech-enhancement-with-multi-head-self-and-cross-attention-2205.08681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u-former-improving-monaural-speech-enhancement-with-multi-head-self-and-cross-attention-2205.08681"/></url>
<url><loc>https://scifaro.com/en/abs/3d-single-source-localization-based-on-euclidean-distance-matrices-2205.08960</loc><lastmod>2022-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-single-source-localization-based-on-euclidean-distance-matrices-2205.08960"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-single-source-localization-based-on-euclidean-distance-matrices-2205.08960"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-binaural-noise-reduction-2205.08983</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-binaural-noise-reduction-2205.08983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multi-frame-mvdr-filtering-for-binaural-noise-reduction-2205.08983"/></url>
<url><loc>https://scifaro.com/en/abs/coherence-based-frequency-subset-selection-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2205.08985</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coherence-based-frequency-subset-selection-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2205.08985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coherence-based-frequency-subset-selection-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2205.08985"/></url>
<url><loc>https://scifaro.com/en/abs/dictionary-based-fusion-of-contact-and-acoustic-microphones-for-wind-noise-reduction-2205.09017</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dictionary-based-fusion-of-contact-and-acoustic-microphones-for-wind-noise-reduction-2205.09017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dictionary-based-fusion-of-contact-and-acoustic-microphones-for-wind-noise-reduction-2205.09017"/></url>
<url><loc>https://scifaro.com/en/abs/macedonian-speech-synthesis-for-assistive-technology-applications-2205.09198</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/macedonian-speech-synthesis-for-assistive-technology-applications-2205.09198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/macedonian-speech-synthesis-for-assistive-technology-applications-2205.09198"/></url>
<url><loc>https://scifaro.com/en/abs/bias-analysis-of-spatial-coherence-based-rtf-vector-estimation-for-acoustic-sensor-networks-in-a-diffuse-sound-field-2205.09401</loc><lastmod>2023-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bias-analysis-of-spatial-coherence-based-rtf-vector-estimation-for-acoustic-sensor-networks-in-a-diffuse-sound-field-2205.09401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bias-analysis-of-spatial-coherence-based-rtf-vector-estimation-for-acoustic-sensor-networks-in-a-diffuse-sound-field-2205.09401"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-for-multi-exponential-sound-energy-decay-analysis-2205.09644</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-for-multi-exponential-sound-energy-decay-analysis-2205.09644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-for-multi-exponential-sound-energy-decay-analysis-2205.09644"/></url>
<url><loc>https://scifaro.com/en/abs/bi-lstm-scoring-based-similarity-measurement-with-agglomerative-hierarchical-clustering-ahc-for-speaker-diarization-2205.09709</loc><lastmod>2022-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bi-lstm-scoring-based-similarity-measurement-with-agglomerative-hierarchical-clustering-ahc-for-speaker-diarization-2205.09709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bi-lstm-scoring-based-similarity-measurement-with-agglomerative-hierarchical-clustering-ahc-for-speaker-diarization-2205.09709"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-zero-shot-voice-conversion-with-location-variable-convolutions-2205.09784</loc><lastmod>2024-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-zero-shot-voice-conversion-with-location-variable-convolutions-2205.09784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-zero-shot-voice-conversion-with-location-variable-convolutions-2205.09784"/></url>
<url><loc>https://scifaro.com/en/abs/voice-activity-projection-self-supervised-learning-of-turn-taking-events-2205.09812</loc><lastmod>2022-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-activity-projection-self-supervised-learning-of-turn-taking-events-2205.09812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-activity-projection-self-supervised-learning-of-turn-taking-events-2205.09812"/></url>
<url><loc>https://scifaro.com/en/abs/content-context-factorized-representations-for-automated-speech-recognition-2205.09872</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/content-context-factorized-representations-for-automated-speech-recognition-2205.09872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/content-context-factorized-representations-for-automated-speech-recognition-2205.09872"/></url>
<url><loc>https://scifaro.com/en/abs/audio-declipping-with-weighted-analysis-social-sparsity-2205.10215</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-declipping-with-weighted-analysis-social-sparsity-2205.10215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-declipping-with-weighted-analysis-social-sparsity-2205.10215"/></url>
<url><loc>https://scifaro.com/en/abs/neuralecho-a-self-attentive-recurrent-neural-network-for-unified-acoustic-echo-suppression-and-speech-enhancement-2205.10401</loc><lastmod>2022-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuralecho-a-self-attentive-recurrent-neural-network-for-unified-acoustic-echo-suppression-and-speech-enhancement-2205.10401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuralecho-a-self-attentive-recurrent-neural-network-for-unified-acoustic-echo-suppression-and-speech-enhancement-2205.10401"/></url>
<url><loc>https://scifaro.com/en/abs/sepit-approaching-a-single-channel-speech-separation-bound-2205.11801</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sepit-approaching-a-single-channel-speech-separation-bound-2205.11801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sepit-approaching-a-single-channel-speech-separation-bound-2205.11801"/></url>
<url><loc>https://scifaro.com/en/abs/paddlespeech-an-easy-to-use-all-in-one-speech-toolkit-2205.12007</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paddlespeech-an-easy-to-use-all-in-one-speech-toolkit-2205.12007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paddlespeech-an-easy-to-use-all-in-one-speech-toolkit-2205.12007"/></url>
<url><loc>https://scifaro.com/en/abs/defending-a-music-recommender-against-hubness-based-adversarial-attacks-2205.12032</loc><lastmod>2022-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/defending-a-music-recommender-against-hubness-based-adversarial-attacks-2205.12032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/defending-a-music-recommender-against-hubness-based-adversarial-attacks-2205.12032"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-on-applying-acoustic-feature-conversion-to-asr-of-adult-and-child-speech-2205.12477</loc><lastmod>2022-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-on-applying-acoustic-feature-conversion-to-asr-of-adult-and-child-speech-2205.12477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-on-applying-acoustic-feature-conversion-to-asr-of-adult-and-child-speech-2205.12477"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-preserved-communication-system-for-highly-efficient-speech-transmission-2205.12727</loc><lastmod>2022-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-preserved-communication-system-for-highly-efficient-speech-transmission-2205.12727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-preserved-communication-system-for-highly-efficient-speech-transmission-2205.12727"/></url>
<url><loc>https://scifaro.com/en/abs/synthesis-of-soundfields-through-irregular-loudspeaker-arrays-based-on-convolutional-neural-networks-2205.12872</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesis-of-soundfields-through-irregular-loudspeaker-arrays-based-on-convolutional-neural-networks-2205.12872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesis-of-soundfields-through-irregular-loudspeaker-arrays-based-on-convolutional-neural-networks-2205.12872"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-tail-neural-network-for-realtime-custom-keyword-spotting-2205.12933</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-tail-neural-network-for-realtime-custom-keyword-spotting-2205.12933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-tail-neural-network-for-realtime-custom-keyword-spotting-2205.12933"/></url>
<url><loc>https://scifaro.com/en/abs/audio-data-augmentation-for-acoustic-to-articulatory-speech-inversion-using-bidirectional-gated-rnns-2205.13086</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-data-augmentation-for-acoustic-to-articulatory-speech-inversion-using-bidirectional-gated-rnns-2205.13086"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-data-augmentation-for-acoustic-to-articulatory-speech-inversion-using-bidirectional-gated-rnns-2205.13086"/></url>
<url><loc>https://scifaro.com/en/abs/joint-training-of-speech-enhancement-and-self-supervised-model-for-noise-robust-asr-2205.13293</loc><lastmod>2022-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-training-of-speech-enhancement-and-self-supervised-model-for-noise-robust-asr-2205.13293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-training-of-speech-enhancement-and-self-supervised-model-for-noise-robust-asr-2205.13293"/></url>
<url><loc>https://scifaro.com/en/abs/an-enhanced-conv-tasnet-model-for-speech-separation-using-a-speaker-distance-based-loss-function-2205.13657</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-enhanced-conv-tasnet-model-for-speech-separation-using-a-speaker-distance-based-loss-function-2205.13657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-enhanced-conv-tasnet-model-for-speech-separation-using-a-speaker-distance-based-loss-function-2205.13657"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-with-multi-task-learning-2205.13755</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-with-multi-task-learning-2205.13755"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-with-multi-task-learning-2205.13755"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-conditioning-single-channel-target-speaker-extraction-using-conformer-based-architectures-2205.13851</loc><lastmod>2022-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-conditioning-single-channel-target-speaker-extraction-using-conformer-based-architectures-2205.13851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-conditioning-single-channel-target-speaker-extraction-using-conformer-based-architectures-2205.13851"/></url>
<url><loc>https://scifaro.com/en/abs/deep-representation-decomposition-for-rate-invariant-speaker-verification-2205.14294</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-representation-decomposition-for-rate-invariant-speaker-verification-2205.14294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-representation-decomposition-for-rate-invariant-speaker-verification-2205.14294"/></url>
<url><loc>https://scifaro.com/en/abs/to-catch-a-chorus-verse-intro-or-anything-else-analyzing-a-song-with-structural-functions-2205.14700</loc><lastmod>2022-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-catch-a-chorus-verse-intro-or-anything-else-analyzing-a-song-with-structural-functions-2205.14700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-catch-a-chorus-verse-intro-or-anything-else-analyzing-a-song-with-structural-functions-2205.14700"/></url>
<url><loc>https://scifaro.com/en/abs/binauralgrad-a-two-stage-conditional-diffusion-probabilistic-model-for-binaural-audio-synthesis-2205.14807</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binauralgrad-a-two-stage-conditional-diffusion-probabilistic-model-for-binaural-audio-synthesis-2205.14807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binauralgrad-a-two-stage-conditional-diffusion-probabilistic-model-for-binaural-audio-synthesis-2205.14807"/></url>
<url><loc>https://scifaro.com/en/abs/styletts-a-style-based-generative-model-for-natural-and-diverse-text-to-speech-synthesis-2205.15439</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styletts-a-style-based-generative-model-for-natural-and-diverse-text-to-speech-synthesis-2205.15439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styletts-a-style-based-generative-model-for-natural-and-diverse-text-to-speech-synthesis-2205.15439"/></url>
<url><loc>https://scifaro.com/en/abs/conversational-speech-separation-an-evaluation-study-for-streaming-applications-2205.15700</loc><lastmod>2022-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conversational-speech-separation-an-evaluation-study-for-streaming-applications-2205.15700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conversational-speech-separation-an-evaluation-study-for-streaming-applications-2205.15700"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-synthesis-based-data-augmentation-for-code-switched-spoken-language-identification-2205.15747</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-synthesis-based-data-augmentation-for-code-switched-spoken-language-identification-2205.15747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-synthesis-based-data-augmentation-for-code-switched-spoken-language-identification-2205.15747"/></url>
<url><loc>https://scifaro.com/en/abs/squeezeformer-an-efficient-transformer-for-automatic-speech-recognition-2206.00888</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/squeezeformer-an-efficient-transformer-for-automatic-speech-recognition-2206.00888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/squeezeformer-an-efficient-transformer-for-automatic-speech-recognition-2206.00888"/></url>
<url><loc>https://scifaro.com/en/abs/pronunciation-dictionary-free-multilingual-speech-synthesis-by-combining-unsupervised-and-supervised-phonetic-representations-2206.00951</loc><lastmod>2022-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pronunciation-dictionary-free-multilingual-speech-synthesis-by-combining-unsupervised-and-supervised-phonetic-representations-2206.00951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pronunciation-dictionary-free-multilingual-speech-synthesis-by-combining-unsupervised-and-supervised-phonetic-representations-2206.00951"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-audio-visual-data-using-spatial-alignment-2206.00970</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-audio-visual-data-using-spatial-alignment-2206.00970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-from-audio-visual-data-using-spatial-alignment-2206.00970"/></url>
<url><loc>https://scifaro.com/en/abs/snow-mountain-dataset-of-audio-recordings-of-the-bible-in-low-resource-languages-2206.01205</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snow-mountain-dataset-of-audio-recordings-of-the-bible-in-low-resource-languages-2206.01205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snow-mountain-dataset-of-audio-recordings-of-the-bible-in-low-resource-languages-2206.01205"/></url>
<url><loc>https://scifaro.com/en/abs/starss22-a-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2206.01948</loc><lastmod>2022-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/starss22-a-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2206.01948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/starss22-a-dataset-of-spatial-recordings-of-real-scenes-with-spatiotemporal-annotations-of-sound-events-2206.01948"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-frequency-independent-dialogue-separation-2206.02124</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-frequency-independent-dialogue-separation-2206.02124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-frequency-independent-dialogue-separation-2206.02124"/></url>
<url><loc>https://scifaro.com/en/abs/geometrically-motivated-primary-ambient-decomposition-with-center-channel-extraction-2206.02125</loc><lastmod>2022-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/geometrically-motivated-primary-ambient-decomposition-with-center-channel-extraction-2206.02125"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/geometrically-motivated-primary-ambient-decomposition-with-center-channel-extraction-2206.02125"/></url>
<url><loc>https://scifaro.com/en/abs/dict-tts-learning-to-pronounce-with-prior-dictionary-knowledge-for-text-to-speech-2206.02147</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dict-tts-learning-to-pronounce-with-prior-dictionary-knowledge-for-text-to-speech-2206.02147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dict-tts-learning-to-pronounce-with-prior-dictionary-knowledge-for-text-to-speech-2206.02147"/></url>
<url><loc>https://scifaro.com/en/abs/online-neural-diarization-of-unlimited-numbers-of-speakers-using-global-and-local-attractors-2206.02432</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-neural-diarization-of-unlimited-numbers-of-speakers-using-global-and-local-attractors-2206.02432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-neural-diarization-of-unlimited-numbers-of-speakers-using-global-and-local-attractors-2206.02432"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-tts-acoustic-modeling-for-tts-with-conditional-disentangled-sequential-vae-2206.02512</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-tts-acoustic-modeling-for-tts-with-conditional-disentangled-sequential-vae-2206.02512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-tts-acoustic-modeling-for-tts-with-conditional-disentangled-sequential-vae-2206.02512"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-time-analog-filters-for-audio-edge-intelligence-review-on-circuit-designs-2206.02639</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-time-analog-filters-for-audio-edge-intelligence-review-on-circuit-designs-2206.02639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-time-analog-filters-for-audio-edge-intelligence-review-on-circuit-designs-2206.02639"/></url>
<url><loc>https://scifaro.com/en/abs/fednst-federated-noisy-student-training-for-automatic-speech-recognition-2206.02797</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fednst-federated-noisy-student-training-for-automatic-speech-recognition-2206.02797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fednst-federated-noisy-student-training-for-automatic-speech-recognition-2206.02797"/></url>
<url><loc>https://scifaro.com/en/abs/flexlip-a-controllable-text-to-lip-system-2206.03206</loc><lastmod>2022-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexlip-a-controllable-text-to-lip-system-2206.03206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexlip-a-controllable-text-to-lip-system-2206.03206"/></url>
<url><loc>https://scifaro.com/en/abs/the-influence-of-dataset-partitioning-on-dysfluency-detection-systems-2206.03400</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-influence-of-dataset-partitioning-on-dysfluency-detection-systems-2206.03400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-influence-of-dataset-partitioning-on-dysfluency-detection-systems-2206.03400"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-in-dcase-2022-challenge-2206.03835</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-in-dcase-2022-challenge-2206.03835"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-in-dcase-2022-challenge-2206.03835"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-integration-of-acoustics-and-lidar-a-multi-modal-approach-to-acoustic-reflector-estimation-2206.03885</loc><lastmod>2022-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-integration-of-acoustics-and-lidar-a-multi-modal-approach-to-acoustic-reflector-estimation-2206.03885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-integration-of-acoustics-and-lidar-a-multi-modal-approach-to-acoustic-reflector-estimation-2206.03885"/></url>
<url><loc>https://scifaro.com/en/abs/context-based-out-of-vocabulary-word-recovery-for-asr-systems-in-indian-languages-2206.04305</loc><lastmod>2022-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-based-out-of-vocabulary-word-recovery-for-asr-systems-in-indian-languages-2206.04305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-based-out-of-vocabulary-word-recovery-for-asr-systems-in-indian-languages-2206.04305"/></url>
<url><loc>https://scifaro.com/en/abs/feature-informed-embedding-space-regularization-for-audio-classification-2206.04850</loc><lastmod>2022-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-informed-embedding-space-regularization-for-audio-classification-2206.04850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-informed-embedding-space-regularization-for-audio-classification-2206.04850"/></url>
<url><loc>https://scifaro.com/en/abs/svadhyaya-system-for-the-second-diagnosing-covid-19-using-acoustics-challenge-2021-2206.05462</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svadhyaya-system-for-the-second-diagnosing-covid-19-using-acoustics-challenge-2021-2206.05462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svadhyaya-system-for-the-second-diagnosing-covid-19-using-acoustics-challenge-2021-2206.05462"/></url>
<url><loc>https://scifaro.com/en/abs/signal-informed-dnn-based-doa-estimation-combining-an-external-microphone-and-gcc-phat-features-2206.05606</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-informed-dnn-based-doa-estimation-combining-an-external-microphone-and-gcc-phat-features-2206.05606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-informed-dnn-based-doa-estimation-combining-an-external-microphone-and-gcc-phat-features-2206.05606"/></url>
<url><loc>https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2206.06108</loc><lastmod>2022-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2206.06108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2206.06108"/></url>
<url><loc>https://scifaro.com/en/abs/ambisep-ambisonic-to-ambisonic-reverberant-speech-separation-using-transformer-networks-2206.06184</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisep-ambisonic-to-ambisonic-reverberant-speech-separation-using-transformer-networks-2206.06184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisep-ambisonic-to-ambisonic-reverberant-speech-separation-using-transformer-networks-2206.06184"/></url>
<url><loc>https://scifaro.com/en/abs/toward-zero-oracle-word-error-rate-on-the-switchboard-benchmark-2206.06192</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-zero-oracle-word-error-rate-on-the-switchboard-benchmark-2206.06192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-zero-oracle-word-error-rate-on-the-switchboard-benchmark-2206.06192"/></url>
<url><loc>https://scifaro.com/en/abs/automated-evaluation-of-standardized-dementia-screening-tests-2206.06208</loc><lastmod>2022-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-evaluation-of-standardized-dementia-screening-tests-2206.06208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-evaluation-of-standardized-dementia-screening-tests-2206.06208"/></url>
<url><loc>https://scifaro.com/en/abs/realistic-gramophone-noise-synthesis-using-a-diffusion-model-2206.06259</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/realistic-gramophone-noise-synthesis-using-a-diffusion-model-2206.06259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/realistic-gramophone-noise-synthesis-using-a-diffusion-model-2206.06259"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-audio-synthesis-with-complex-valued-polynomial-networks-2206.06811</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-audio-synthesis-with-complex-valued-polynomial-networks-2206.06811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-audio-synthesis-with-complex-valued-polynomial-networks-2206.06811"/></url>
<url><loc>https://scifaro.com/en/abs/latency-control-for-keyword-spotting-2206.07261</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latency-control-for-keyword-spotting-2206.07261"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latency-control-for-keyword-spotting-2206.07261"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-cross-domain-and-cross-lingual-ultrasound-tongue-imaging-features-for-elderly-and-dysarthric-speech-recognition-2206.07327</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-cross-domain-and-cross-lingual-ultrasound-tongue-imaging-features-for-elderly-and-dysarthric-speech-recognition-2206.07327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-cross-domain-and-cross-lingual-ultrasound-tongue-imaging-features-for-elderly-and-dysarthric-speech-recognition-2206.07327"/></url>
<url><loc>https://scifaro.com/en/abs/residual-language-model-for-end-to-end-speech-recognition-2206.07430</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-language-model-for-end-to-end-speech-recognition-2206.07430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-language-model-for-end-to-end-speech-recognition-2206.07430"/></url>
<url><loc>https://scifaro.com/en/abs/the-zevomos-entry-to-voicemos-challenge-2022-2206.07448</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-zevomos-entry-to-voicemos-challenge-2022-2206.07448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-zevomos-entry-to-voicemos-challenge-2022-2206.07448"/></url>
<url><loc>https://scifaro.com/en/abs/editnet-a-lightweight-network-for-unsupervised-domain-adaptation-in-speaker-verification-2206.07548</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/editnet-a-lightweight-network-for-unsupervised-domain-adaptation-in-speaker-verification-2206.07548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/editnet-a-lightweight-network-for-unsupervised-domain-adaptation-in-speaker-verification-2206.07548"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-frequency-filters-for-speech-feature-extraction-in-speaker-verification-2206.07563</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-frequency-filters-for-speech-feature-extraction-in-speaker-verification-2206.07563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-frequency-filters-for-speech-feature-extraction-in-speaker-verification-2206.07563"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-voice-conversion-with-information-perturbation-2206.07569</loc><lastmod>2022-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-voice-conversion-with-information-perturbation-2206.07569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-voice-conversion-with-information-perturbation-2206.07569"/></url>
<url><loc>https://scifaro.com/en/abs/the-scattering-transform-network-with-generalized-morse-wavelets-and-its-application-to-music-genre-classification-2206.07857</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-scattering-transform-network-with-generalized-morse-wavelets-and-its-application-to-music-genre-classification-2206.07857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-scattering-transform-network-with-generalized-morse-wavelets-and-its-application-to-music-genre-classification-2206.07857"/></url>
<url><loc>https://scifaro.com/en/abs/to-dereverb-or-not-to-dereverb-perceptual-studies-on-real-time-dereverberation-targets-2206.07917</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-dereverb-or-not-to-dereverb-perceptual-studies-on-real-time-dereverberation-targets-2206.07917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-dereverb-or-not-to-dereverb-perceptual-studies-on-real-time-dereverberation-targets-2206.07917"/></url>
<url><loc>https://scifaro.com/en/abs/draft-a-novel-framework-to-reduce-domain-shifting-in-self-supervised-learning-and-its-application-to-children-s-asr-2206.07931</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/draft-a-novel-framework-to-reduce-domain-shifting-in-self-supervised-learning-and-its-application-to-children-s-asr-2206.07931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/draft-a-novel-framework-to-reduce-domain-shifting-in-self-supervised-learning-and-its-application-to-children-s-asr-2206.07931"/></url>
<url><loc>https://scifaro.com/en/abs/a-ctc-triggered-siamese-network-with-spatial-temporal-dropout-for-speech-recognition-2206.08031</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-ctc-triggered-siamese-network-with-spatial-temporal-dropout-for-speech-recognition-2206.08031"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-ctc-triggered-siamese-network-with-spatial-temporal-dropout-for-speech-recognition-2206.08031"/></url>
<url><loc>https://scifaro.com/en/abs/nonwords-pronunciation-classification-in-language-development-tests-for-preschool-children-2206.08058</loc><lastmod>2022-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonwords-pronunciation-classification-in-language-development-tests-for-preschool-children-2206.08058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonwords-pronunciation-classification-in-language-development-tests-for-preschool-children-2206.08058"/></url>
<url><loc>https://scifaro.com/en/abs/strategies-to-improve-robustness-of-target-speech-extraction-to-enrollment-variations-2206.08174</loc><lastmod>2022-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/strategies-to-improve-robustness-of-target-speech-extraction-to-enrollment-variations-2206.08174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/strategies-to-improve-robustness-of-target-speech-extraction-to-enrollment-variations-2206.08174"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-speech-extraction-for-multiple-target-speakers-under-the-meeting-scenarios-2206.08525</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-speech-extraction-for-multiple-target-speakers-under-the-meeting-scenarios-2206.08525"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-speech-extraction-for-multiple-target-speakers-under-the-meeting-scenarios-2206.08525"/></url>
<url><loc>https://scifaro.com/en/abs/nu-wave-2-a-general-neural-audio-upsampling-model-for-various-sampling-rates-2206.08545</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nu-wave-2-a-general-neural-audio-upsampling-model-for-various-sampling-rates-2206.08545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nu-wave-2-a-general-neural-audio-upsampling-model-for-various-sampling-rates-2206.08545"/></url>
<url><loc>https://scifaro.com/en/abs/nastar-noise-adaptive-speech-enhancement-with-target-conditional-resampling-2206.09058</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nastar-noise-adaptive-speech-enhancement-with-target-conditional-resampling-2206.09058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nastar-noise-adaptive-speech-enhancement-with-target-conditional-resampling-2206.09058"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-time-domain-target-speaker-extraction-with-attention-2206.09072</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-time-domain-target-speaker-extraction-with-attention-2206.09072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-time-domain-target-speaker-extraction-with-attention-2206.09072"/></url>
<url><loc>https://scifaro.com/en/abs/decoupled-federated-learning-for-asr-with-non-iid-data-2206.09102</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupled-federated-learning-for-asr-with-non-iid-data-2206.09102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupled-federated-learning-for-asr-with-non-iid-data-2206.09102"/></url>
<url><loc>https://scifaro.com/en/abs/identifying-source-speakers-for-voice-conversion-based-spoofing-attacks-on-speaker-verification-systems-2206.09103</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identifying-source-speakers-for-voice-conversion-based-spoofing-attacks-on-speaker-verification-systems-2206.09103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identifying-source-speakers-for-voice-conversion-based-spoofing-attacks-on-speaker-verification-systems-2206.09103"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-for-robust-low-resource-children-s-speech-asr-with-transformers-and-source-filter-warping-2206.09396</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-for-robust-low-resource-children-s-speech-asr-with-transformers-and-source-filter-warping-2206.09396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-for-robust-low-resource-children-s-speech-asr-with-transformers-and-source-filter-warping-2206.09396"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-separation-transformer-2206.09507</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-separation-transformer-2206.09507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-separation-transformer-2206.09507"/></url>
<url><loc>https://scifaro.com/en/abs/towards-trustworthy-edge-intelligence-insights-from-voice-activated-services-2206.09523</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-trustworthy-edge-intelligence-insights-from-voice-activated-services-2206.09523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-trustworthy-edge-intelligence-insights-from-voice-activated-services-2206.09523"/></url>
<url><loc>https://scifaro.com/en/abs/a-step-towards-preserving-speakers-identity-while-detecting-depression-via-speaker-disentanglement-2206.09530</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-step-towards-preserving-speakers-identity-while-detecting-depression-via-speaker-disentanglement-2206.09530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-step-towards-preserving-speakers-identity-while-detecting-depression-via-speaker-disentanglement-2206.09530"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-analysis-on-the-vulnerabilities-of-end-to-end-speech-segregation-models-2206.09556</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-analysis-on-the-vulnerabilities-of-end-to-end-speech-segregation-models-2206.09556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-analysis-on-the-vulnerabilities-of-end-to-end-speech-segregation-models-2206.09556"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-end-to-end-neural-network-for-speech-enhancement-source-localization-and-voice-activity-detection-2206.09728</loc><lastmod>2022-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-end-to-end-neural-network-for-speech-enhancement-source-localization-and-voice-activity-detection-2206.09728"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-end-to-end-neural-network-for-speech-enhancement-source-localization-and-voice-activity-detection-2206.09728"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-cross-domain-speech-recognition-with-self-supervision-2206.09783</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-cross-domain-speech-recognition-with-self-supervision-2206.09783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-cross-domain-speech-recognition-with-self-supervision-2206.09783"/></url>
<url><loc>https://scifaro.com/en/abs/a-systematic-comparison-of-phonetic-aware-techniques-for-speech-enhancement-2206.11000</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-systematic-comparison-of-phonetic-aware-techniques-for-speech-enhancement-2206.11000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-systematic-comparison-of-phonetic-aware-techniques-for-speech-enhancement-2206.11000"/></url>
<url><loc>https://scifaro.com/en/abs/covyt-introducing-the-coronavirus-youtube-and-tiktok-speech-dataset-featuring-the-same-speakers-with-and-without-infection-2206.11045</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covyt-introducing-the-coronavirus-youtube-and-tiktok-speech-dataset-featuring-the-same-speakers-with-and-without-infection-2206.11045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covyt-introducing-the-coronavirus-youtube-and-tiktok-speech-dataset-featuring-the-same-speakers-with-and-without-infection-2206.11045"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-with-dual-mode-chunked-attention-for-joint-online-and-offline-asr-2206.11157</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-with-dual-mode-chunked-attention-for-joint-online-and-offline-asr-2206.11157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-with-dual-mode-chunked-attention-for-joint-online-and-offline-asr-2206.11157"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-role-of-spatial-spectral-and-temporal-processing-for-dnn-based-non-linear-multi-channel-speech-enhancement-2206.11181</loc><lastmod>2022-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-role-of-spatial-spectral-and-temporal-processing-for-dnn-based-non-linear-multi-channel-speech-enhancement-2206.11181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-role-of-spatial-spectral-and-temporal-processing-for-dnn-based-non-linear-multi-channel-speech-enhancement-2206.11181"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-multi-task-learning-for-disentangling-timbre-and-pitch-in-singing-voice-synthesis-2206.11558</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-multi-task-learning-for-disentangling-timbre-and-pitch-in-singing-voice-synthesis-2206.11558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-multi-task-learning-for-disentangling-timbre-and-pitch-in-singing-voice-synthesis-2206.11558"/></url>
<url><loc>https://scifaro.com/en/abs/two-pass-decoding-and-cross-adaptation-based-system-combination-of-end-to-end-conformer-and-hybrid-tdnn-asr-systems-2206.11596</loc><lastmod>2023-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-pass-decoding-and-cross-adaptation-based-system-combination-of-end-to-end-conformer-and-hybrid-tdnn-asr-systems-2206.11596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-pass-decoding-and-cross-adaptation-based-system-combination-of-end-to-end-conformer-and-hybrid-tdnn-asr-systems-2206.11596"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-microphone-identification-in-noisy-conditions-2206.11640</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-microphone-identification-in-noisy-conditions-2206.11640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-microphone-identification-in-noisy-conditions-2206.11640"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-dependent-sound-event-detection-for-dcase-2022-challenge-task-4-2206.11645</loc><lastmod>2022-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-dependent-sound-event-detection-for-dcase-2022-challenge-task-4-2206.11645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-dependent-sound-event-detection-for-dcase-2022-challenge-task-4-2206.11645"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-transformer-based-speech-enhancement-using-long-frames-and-stft-magnitudes-2206.11703</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-transformer-based-speech-enhancement-using-long-frames-and-stft-magnitudes-2206.11703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-transformer-based-speech-enhancement-using-long-frames-and-stft-magnitudes-2206.11703"/></url>
<url><loc>https://scifaro.com/en/abs/a-temporal-extension-of-latent-dirichlet-allocation-for-unsupervised-acoustic-unit-discovery-2206.11706</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-temporal-extension-of-latent-dirichlet-allocation-for-unsupervised-acoustic-unit-discovery-2206.11706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-temporal-extension-of-latent-dirichlet-allocation-for-unsupervised-acoustic-unit-discovery-2206.11706"/></url>
<url><loc>https://scifaro.com/en/abs/towards-end-to-end-private-automatic-speaker-recognition-2206.11750</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-end-to-end-private-automatic-speaker-recognition-2206.11750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-end-to-end-private-automatic-speaker-recognition-2206.11750"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-text-to-speech-based-on-latent-representation-of-speaking-styles-using-spontaneous-dialogue-2206.12040</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-text-to-speech-based-on-latent-representation-of-speaking-styles-using-spontaneous-dialogue-2206.12040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-text-to-speech-based-on-latent-representation-of-speaking-styles-using-spontaneous-dialogue-2206.12040"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-score-based-conformer-speaker-adaptation-for-speech-recognition-2206.12045</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-score-based-conformer-speaker-adaptation-for-speech-recognition-2206.12045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-score-based-conformer-speaker-adaptation-for-speech-recognition-2206.12045"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-and-squeeze-and-excitation-network-on-multiple-dimension-for-sound-event-localization-and-detection-in-real-scenes-2206.12059</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-and-squeeze-and-excitation-network-on-multiple-dimension-for-sound-event-localization-and-detection-in-real-scenes-2206.12059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-and-squeeze-and-excitation-network-on-multiple-dimension-for-sound-event-localization-and-detection-in-real-scenes-2206.12059"/></url>
<url><loc>https://scifaro.com/en/abs/sane-tts-stable-and-natural-end-to-end-multilingual-text-to-speech-2206.12132</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sane-tts-stable-and-natural-end-to-end-multilingual-text-to-speech-2206.12132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sane-tts-stable-and-natural-end-to-end-multilingual-text-to-speech-2206.12132"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-sound-source-localization-for-unknown-number-of-sources-2206.12273</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-sound-source-localization-for-unknown-number-of-sources-2206.12273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-sound-source-localization-for-unknown-number-of-sources-2206.12273"/></url>
<url><loc>https://scifaro.com/en/abs/open-source-objective-oriented-framework-for-head-related-transfer-function-2206.12283</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-source-objective-oriented-framework-for-head-related-transfer-function-2206.12283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-source-objective-oriented-framework-for-head-related-transfer-function-2206.12283"/></url>
<url><loc>https://scifaro.com/en/abs/speech-quality-assessment-through-mos-using-non-matching-references-2206.12285</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-quality-assessment-through-mos-using-non-matching-references-2206.12285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-quality-assessment-through-mos-using-non-matching-references-2206.12285"/></url>
<url><loc>https://scifaro.com/en/abs/saqam-spatial-audio-quality-assessment-metric-2206.12297</loc><lastmod>2022-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saqam-spatial-audio-quality-assessment-metric-2206.12297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saqam-spatial-audio-quality-assessment-metric-2206.12297"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-the-impact-of-sars-cov-2-variants-on-respiratory-sound-signals-2206.12309</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-the-impact-of-sars-cov-2-variants-on-respiratory-sound-signals-2206.12309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-the-impact-of-sars-cov-2-variants-on-respiratory-sound-signals-2206.12309"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-within-and-across-language-phoneme-recognition-performance-of-self-supervised-learning-speech-pre-trained-models-2206.12489</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-within-and-across-language-phoneme-recognition-performance-of-self-supervised-learning-speech-pre-trained-models-2206.12489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-within-and-across-language-phoneme-recognition-performance-of-self-supervised-learning-speech-pre-trained-models-2206.12489"/></url>
<url><loc>https://scifaro.com/en/abs/meta-auxiliary-learning-for-low-resource-spoken-language-understanding-2206.12774</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-auxiliary-learning-for-low-resource-spoken-language-understanding-2206.12774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-auxiliary-learning-for-low-resource-spoken-language-understanding-2206.12774"/></url>
<url><loc>https://scifaro.com/en/abs/transport-oriented-feature-aggregation-for-speaker-embedding-learning-2206.12857</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transport-oriented-feature-aggregation-for-speaker-embedding-learning-2206.12857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transport-oriented-feature-aggregation-for-speaker-embedding-learning-2206.12857"/></url>
<url><loc>https://scifaro.com/en/abs/joint-optimization-of-sampling-rate-offsets-based-on-entire-signal-relationship-among-distributed-microphones-2206.13014</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-optimization-of-sampling-rate-offsets-based-on-entire-signal-relationship-among-distributed-microphones-2206.13014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-optimization-of-sampling-rate-offsets-based-on-entire-signal-relationship-among-distributed-microphones-2206.13014"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-instance-discriminative-learning-for-depression-detection-from-speech-signals-2206.13016</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-instance-discriminative-learning-for-depression-detection-from-speech-signals-2206.13016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-instance-discriminative-learning-for-depression-detection-from-speech-signals-2206.13016"/></url>
<url><loc>https://scifaro.com/en/abs/extended-u-net-for-speaker-verification-in-noisy-environments-2206.13044</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extended-u-net-for-speaker-verification-in-noisy-environments-2206.13044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extended-u-net-for-speaker-verification-in-noisy-environments-2206.13044"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-doctored-speech-towards-an-end-to-end-parametric-learn-able-filter-approach-2206.13066</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-doctored-speech-towards-an-end-to-end-parametric-learn-able-filter-approach-2206.13066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-doctored-speech-towards-an-end-to-end-parametric-learn-able-filter-approach-2206.13066"/></url>
<url><loc>https://scifaro.com/en/abs/qbye-mlpmixer-query-by-example-open-vocabulary-keyword-spotting-using-mlpmixer-2206.13231</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qbye-mlpmixer-query-by-example-open-vocabulary-keyword-spotting-using-mlpmixer-2206.13231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qbye-mlpmixer-query-by-example-open-vocabulary-keyword-spotting-using-mlpmixer-2206.13231"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-based-elderly-speech-recognition-system-for-alzheimer-s-disease-detection-2206.13232</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-based-elderly-speech-recognition-system-for-alzheimer-s-disease-detection-2206.13232"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-based-elderly-speech-recognition-system-for-alzheimer-s-disease-detection-2206.13232"/></url>
<url><loc>https://scifaro.com/en/abs/pruned-rnn-t-for-fast-memory-efficient-asr-training-2206.13236</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pruned-rnn-t-for-fast-memory-efficient-asr-training-2206.13236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pruned-rnn-t-for-fast-memory-efficient-asr-training-2206.13236"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-baseline-for-domain-adaptation-in-end-to-end-asr-systems-using-synthetic-data-2206.13240</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-baseline-for-domain-adaptation-in-end-to-end-asr-systems-using-synthetic-data-2206.13240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-baseline-for-domain-adaptation-in-end-to-end-asr-systems-using-synthetic-data-2206.13240"/></url>
<url><loc>https://scifaro.com/en/abs/wideband-audio-waveform-evaluation-networks-efficient-accurate-estimation-of-speech-qualities-2206.13272</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wideband-audio-waveform-evaluation-networks-efficient-accurate-estimation-of-speech-qualities-2206.13272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wideband-audio-waveform-evaluation-networks-efficient-accurate-estimation-of-speech-qualities-2206.13272"/></url>
<url><loc>https://scifaro.com/en/abs/insights-into-deep-non-linear-filters-for-improved-multi-channel-speech-enhancement-2206.13310</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/insights-into-deep-non-linear-filters-for-improved-multi-channel-speech-enhancement-2206.13310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/insights-into-deep-non-linear-filters-for-improved-multi-channel-speech-enhancement-2206.13310"/></url>
<url><loc>https://scifaro.com/en/abs/interpretable-acoustic-representation-learning-on-breathing-and-speech-signals-for-covid-19-detection-2206.13365</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretable-acoustic-representation-learning-on-breathing-and-speech-signals-for-covid-19-detection-2206.13365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretable-acoustic-representation-learning-on-breathing-and-speech-signals-for-covid-19-detection-2206.13365"/></url>
<url><loc>https://scifaro.com/en/abs/avocodo-generative-adversarial-network-for-artifact-free-vocoder-2206.13404</loc><lastmod>2023-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avocodo-generative-adversarial-network-for-artifact-free-vocoder-2206.13404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avocodo-generative-adversarial-network-for-artifact-free-vocoder-2206.13404"/></url>
<url><loc>https://scifaro.com/en/abs/audio-similarity-is-unreliable-as-a-proxy-for-audio-quality-2206.13411</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-similarity-is-unreliable-as-a-proxy-for-audio-quality-2206.13411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-similarity-is-unreliable-as-a-proxy-for-audio-quality-2206.13411"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-voice-activity-detection-by-modeling-source-and-system-information-using-zero-frequency-filtering-2206.13420</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-voice-activity-detection-by-modeling-source-and-system-information-using-zero-frequency-filtering-2206.13420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-voice-activity-detection-by-modeling-source-and-system-information-using-zero-frequency-filtering-2206.13420"/></url>
<url><loc>https://scifaro.com/en/abs/copycat2-a-single-model-for-multi-speaker-tts-and-many-to-many-fine-grained-prosody-transfer-2206.13443</loc><lastmod>2022-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/copycat2-a-single-model-for-multi-speaker-tts-and-many-to-many-fine-grained-prosody-transfer-2206.13443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/copycat2-a-single-model-for-multi-speaker-tts-and-many-to-many-fine-grained-prosody-transfer-2206.13443"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-conditioning-methods-using-variable-frame-rate-for-style-robust-speaker-verification-2206.13680</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-conditioning-methods-using-variable-frame-rate-for-style-robust-speaker-verification-2206.13680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-conditioning-methods-using-variable-frame-rate-for-style-robust-speaker-verification-2206.13680"/></url>
<url><loc>https://scifaro.com/en/abs/learning-from-human-perception-to-improve-automatic-speaker-verification-in-style-mismatched-conditions-2206.13684</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-from-human-perception-to-improve-automatic-speaker-verification-in-style-mismatched-conditions-2206.13684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-from-human-perception-to-improve-automatic-speaker-verification-in-style-mismatched-conditions-2206.13684"/></url>
<url><loc>https://scifaro.com/en/abs/interrelate-training-and-searching-a-unified-online-clustering-framework-for-speaker-diarization-2206.13760</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interrelate-training-and-searching-a-unified-online-clustering-framework-for-speaker-diarization-2206.13760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interrelate-training-and-searching-a-unified-online-clustering-framework-for-speaker-diarization-2206.13760"/></url>
<url><loc>https://scifaro.com/en/abs/a-hierarchical-speaker-representation-framework-for-one-shot-singing-voice-conversion-2206.13762</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hierarchical-speaker-representation-framework-for-one-shot-singing-voice-conversion-2206.13762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hierarchical-speaker-representation-framework-for-one-shot-singing-voice-conversion-2206.13762"/></url>
<url><loc>https://scifaro.com/en/abs/algorithms-for-audio-inpainting-based-on-probabilistic-nonnegative-matrix-factorization-2206.13768</loc><lastmod>2023-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/algorithms-for-audio-inpainting-based-on-probabilistic-nonnegative-matrix-factorization-2206.13768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/algorithms-for-audio-inpainting-based-on-probabilistic-nonnegative-matrix-factorization-2206.13768"/></url>
<url><loc>https://scifaro.com/en/abs/two-methods-for-spoofing-aware-speaker-verification-multi-layer-perceptron-score-fusion-model-and-integrated-embedding-projector-2206.13807</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-methods-for-spoofing-aware-speaker-verification-multi-layer-perceptron-score-fusion-model-and-integrated-embedding-projector-2206.13807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-methods-for-spoofing-aware-speaker-verification-multi-layer-perceptron-score-fusion-model-and-integrated-embedding-projector-2206.13807"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-in-multi-speaker-environments-using-temporal-feature-fusion-2206.13808</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-in-multi-speaker-environments-using-temporal-feature-fusion-2206.13808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-in-multi-speaker-environments-using-temporal-feature-fusion-2206.13808"/></url>
<url><loc>https://scifaro.com/en/abs/retrievertts-modeling-decomposed-factors-for-text-based-speech-insertion-2206.13865</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrievertts-modeling-decomposed-factors-for-text-based-speech-insertion-2206.13865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrievertts-modeling-decomposed-factors-for-text-based-speech-insertion-2206.13865"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-variable-and-controllable-duration-modelling-in-tts-2206.14165</loc><lastmod>2022-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-variable-and-controllable-duration-modelling-in-tts-2206.14165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-variable-and-controllable-duration-modelling-in-tts-2206.14165"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-conventional-pitch-detection-algorithms-with-a-neural-network-approach-2206.14357</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-conventional-pitch-detection-algorithms-with-a-neural-network-approach-2206.14357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-conventional-pitch-detection-algorithms-with-a-neural-network-approach-2206.14357"/></url>
<url><loc>https://scifaro.com/en/abs/a-light-weight-full-band-speech-enhancement-model-2206.14524</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-light-weight-full-band-speech-enhancement-model-2206.14524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-light-weight-full-band-speech-enhancement-model-2206.14524"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-prediction-network-architecture-in-rnn-t-for-asr-2206.14618</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-prediction-network-architecture-in-rnn-t-for-asr-2206.14618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-prediction-network-architecture-in-rnn-t-for-asr-2206.14618"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-density-ratio-for-language-model-biasing-of-sequence-to-sequence-asr-systems-2206.14623</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-density-ratio-for-language-model-biasing-of-sequence-to-sequence-asr-systems-2206.14623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-density-ratio-for-language-model-biasing-of-sequence-to-sequence-asr-systems-2206.14623"/></url>
<url><loc>https://scifaro.com/en/abs/ddktor-automatic-diadochokinetic-speech-analysis-2206.14639</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddktor-automatic-diadochokinetic-speech-analysis-2206.14639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddktor-automatic-diadochokinetic-speech-analysis-2206.14639"/></url>
<url><loc>https://scifaro.com/en/abs/simple-and-effective-multi-sentence-tts-with-expressive-and-coherent-prosody-2206.14643</loc><lastmod>2022-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simple-and-effective-multi-sentence-tts-with-expressive-and-coherent-prosody-2206.14643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simple-and-effective-multi-sentence-tts-with-expressive-and-coherent-prosody-2206.14643"/></url>
<url><loc>https://scifaro.com/en/abs/nextformer-a-convnext-augmented-conformer-for-end-to-end-speech-recognition-2206.14747</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nextformer-a-convnext-augmented-conformer-for-end-to-end-speech-recognition-2206.14747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nextformer-a-convnext-augmented-conformer-for-end-to-end-speech-recognition-2206.14747"/></url>
<url><loc>https://scifaro.com/en/abs/iemotts-toward-robust-cross-speaker-emotion-transfer-and-control-for-speech-synthesis-based-on-disentanglement-between-prosody-and-timbre-2206.14866</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iemotts-toward-robust-cross-speaker-emotion-transfer-and-control-for-speech-synthesis-based-on-disentanglement-between-prosody-and-timbre-2206.14866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iemotts-toward-robust-cross-speaker-emotion-transfer-and-control-for-speech-synthesis-based-on-disentanglement-between-prosody-and-timbre-2206.14866"/></url>
<url><loc>https://scifaro.com/en/abs/gld-net-improving-monaural-speech-enhancement-by-learning-global-and-local-dependency-features-with-gld-block-2206.14962</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gld-net-improving-monaural-speech-enhancement-by-learning-global-and-local-dependency-features-with-gld-block-2206.14962"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gld-net-improving-monaural-speech-enhancement-by-learning-global-and-local-dependency-features-with-gld-block-2206.14962"/></url>
<url><loc>https://scifaro.com/en/abs/improving-visual-speech-enhancement-network-by-learning-audio-visual-affinity-with-multi-head-attention-2206.14964</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-visual-speech-enhancement-network-by-learning-audio-visual-affinity-with-multi-head-attention-2206.14964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-visual-speech-enhancement-network-by-learning-audio-visual-affinity-with-multi-head-attention-2206.14964"/></url>
<url><loc>https://scifaro.com/en/abs/tts-by-tts-2-data-selective-augmentation-for-neural-speech-synthesis-using-ranking-support-vector-machine-with-variational-autoencoder-2206.14984</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-by-tts-2-data-selective-augmentation-for-neural-speech-synthesis-using-ranking-support-vector-machine-with-variational-autoencoder-2206.14984"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-by-tts-2-data-selective-augmentation-for-neural-speech-synthesis-using-ranking-support-vector-machine-with-variational-autoencoder-2206.14984"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-room-compensation-using-local-pca-based-room-average-power-response-estimation-2206.15356</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-room-compensation-using-local-pca-based-room-average-power-response-estimation-2206.15356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-room-compensation-using-local-pca-based-room-average-power-response-estimation-2206.15356"/></url>
<url><loc>https://scifaro.com/en/abs/learning-audio-text-agreement-for-open-vocabulary-keyword-spotting-2206.15400</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-audio-text-agreement-for-open-vocabulary-keyword-spotting-2206.15400"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-audio-text-agreement-for-open-vocabulary-keyword-spotting-2206.15400"/></url>
<url><loc>https://scifaro.com/en/abs/sub-8-bit-quantization-aware-training-for-8-bit-neural-network-accelerator-with-on-device-speech-recognition-2206.15408</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-8-bit-quantization-aware-training-for-8-bit-neural-network-accelerator-with-on-device-speech-recognition-2206.15408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-8-bit-quantization-aware-training-for-8-bit-neural-network-accelerator-with-on-device-speech-recognition-2206.15408"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-cross-lingual-tts-using-transferable-phoneme-embedding-2206.15427</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-cross-lingual-tts-using-transferable-phoneme-embedding-2206.15427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-cross-lingual-tts-using-transferable-phoneme-embedding-2206.15427"/></url>
<url><loc>https://scifaro.com/en/abs/challenges-and-opportunities-in-multi-device-speech-processing-2206.15432</loc><lastmod>2022-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/challenges-and-opportunities-in-multi-device-speech-processing-2206.15432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/challenges-and-opportunities-in-multi-device-speech-processing-2206.15432"/></url>
<url><loc>https://scifaro.com/en/abs/sasv-based-on-pre-trained-asv-system-and-integrated-scoring-module-2207.00150</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sasv-based-on-pre-trained-asv-system-and-integrated-scoring-module-2207.00150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sasv-based-on-pre-trained-asv-system-and-integrated-scoring-module-2207.00150"/></url>
<url><loc>https://scifaro.com/en/abs/updating-only-encoders-prevents-catastrophic-forgetting-of-end-to-end-asr-models-2207.00216</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/updating-only-encoders-prevents-catastrophic-forgetting-of-end-to-end-asr-models-2207.00216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/updating-only-encoders-prevents-catastrophic-forgetting-of-end-to-end-asr-models-2207.00216"/></url>
<url><loc>https://scifaro.com/en/abs/learning-subject-invariant-representations-from-speech-evoked-eeg-using-variational-autoencoders-2207.00323</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-subject-invariant-representations-from-speech-evoked-eeg-using-variational-autoencoders-2207.00323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-subject-invariant-representations-from-speech-evoked-eeg-using-variational-autoencoders-2207.00323"/></url>
<url><loc>https://scifaro.com/en/abs/fithubert-going-thinner-and-deeper-for-knowledge-distillation-of-speech-self-supervised-learning-2207.00555</loc><lastmod>2022-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fithubert-going-thinner-and-deeper-for-knowledge-distillation-of-speech-self-supervised-learning-2207.00555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fithubert-going-thinner-and-deeper-for-knowledge-distillation-of-speech-self-supervised-learning-2207.00555"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diarization-and-identification-from-single-channel-classroom-audio-recording-using-virtual-microphones-2207.00660</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diarization-and-identification-from-single-channel-classroom-audio-recording-using-virtual-microphones-2207.00660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diarization-and-identification-from-single-channel-classroom-audio-recording-using-virtual-microphones-2207.00660"/></url>
<url><loc>https://scifaro.com/en/abs/userlibri-a-dataset-for-asr-personalization-using-only-text-2207.00706</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/userlibri-a-dataset-for-asr-personalization-using-only-text-2207.00706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/userlibri-a-dataset-for-asr-personalization-using-only-text-2207.00706"/></url>
<url><loc>https://scifaro.com/en/abs/computer-assisted-pronunciation-training-speech-synthesis-is-almost-all-you-need-2207.00774</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computer-assisted-pronunciation-training-speech-synthesis-is-almost-all-you-need-2207.00774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computer-assisted-pronunciation-training-speech-synthesis-is-almost-all-you-need-2207.00774"/></url>
<url><loc>https://scifaro.com/en/abs/a-graph-isomorphism-network-with-weighted-multiple-aggregators-for-speech-emotion-recognition-2207.00940</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-graph-isomorphism-network-with-weighted-multiple-aggregators-for-speech-emotion-recognition-2207.00940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-graph-isomorphism-network-with-weighted-multiple-aggregators-for-speech-emotion-recognition-2207.00940"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-acoustic-contextual-representation-by-audio-textual-cross-modal-learning-for-conversational-asr-2207.01039</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-acoustic-contextual-representation-by-audio-textual-cross-modal-learning-for-conversational-asr-2207.01039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-acoustic-contextual-representation-by-audio-textual-cross-modal-learning-for-conversational-asr-2207.01039"/></url>
<url><loc>https://scifaro.com/en/abs/dailytalk-spoken-dialogue-dataset-for-conversational-text-to-speech-2207.01063</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dailytalk-spoken-dialogue-dataset-for-conversational-text-to-speech-2207.01063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dailytalk-spoken-dialogue-dataset-for-conversational-text-to-speech-2207.01063"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-functions-of-fxlms-based-multi-channel-multi-tone-active-noise-equalizers-2207.01102</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-functions-of-fxlms-based-multi-channel-multi-tone-active-noise-equalizers-2207.01102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-functions-of-fxlms-based-multi-channel-multi-tone-active-noise-equalizers-2207.01102"/></url>
<url><loc>https://scifaro.com/en/abs/glowvc-mel-spectrogram-space-disentangling-model-for-language-independent-text-free-voice-conversion-2207.01454</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/glowvc-mel-spectrogram-space-disentangling-model-for-language-independent-text-free-voice-conversion-2207.01454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/glowvc-mel-spectrogram-space-disentangling-model-for-language-independent-text-free-voice-conversion-2207.01454"/></url>
<url><loc>https://scifaro.com/en/abs/mix-and-match-an-empirical-study-on-training-corpus-composition-for-polyglot-text-to-speech-tts-2207.01507</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mix-and-match-an-empirical-study-on-training-corpus-composition-for-polyglot-text-to-speech-tts-2207.01507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mix-and-match-an-empirical-study-on-training-corpus-composition-for-polyglot-text-to-speech-tts-2207.01507"/></url>
<url><loc>https://scifaro.com/en/abs/unify-and-conquer-how-phonetic-feature-representation-affects-polyglot-text-to-speech-tts-2207.01547</loc><lastmod>2022-07-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unify-and-conquer-how-phonetic-feature-representation-affects-polyglot-text-to-speech-tts-2207.01547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unify-and-conquer-how-phonetic-feature-representation-affects-polyglot-text-to-speech-tts-2207.01547"/></url>
<url><loc>https://scifaro.com/en/abs/semi-blind-source-separation-using-convolutive-transfer-function-for-nonlinear-acoustic-echo-cancellation-2207.01556</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-blind-source-separation-using-convolutive-transfer-function-for-nonlinear-acoustic-echo-cancellation-2207.01556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-blind-source-separation-using-convolutive-transfer-function-for-nonlinear-acoustic-echo-cancellation-2207.01556"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-multi-task-deep-learning-for-noise-robust-voice-activity-detection-with-low-algorithmic-delay-2207.01691</loc><lastmod>2022-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-multi-task-deep-learning-for-noise-robust-voice-activity-detection-with-low-algorithmic-delay-2207.01691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-multi-task-deep-learning-for-noise-robust-voice-activity-detection-with-low-algorithmic-delay-2207.01691"/></url>
<url><loc>https://scifaro.com/en/abs/deformer-coupling-deformed-localized-patterns-with-global-context-for-robust-end-to-end-speech-recognition-2207.01732</loc><lastmod>2025-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deformer-coupling-deformed-localized-patterns-with-global-context-for-robust-end-to-end-speech-recognition-2207.01732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deformer-coupling-deformed-localized-patterns-with-global-context-for-robust-end-to-end-speech-recognition-2207.01732"/></url>
<url><loc>https://scifaro.com/en/abs/relating-the-fundamental-frequency-of-speech-with-eeg-using-a-dilated-convolutional-network-2207.01963</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relating-the-fundamental-frequency-of-speech-with-eeg-using-a-dilated-convolutional-network-2207.01963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relating-the-fundamental-frequency-of-speech-with-eeg-using-a-dilated-convolutional-network-2207.01963"/></url>
<url><loc>https://scifaro.com/en/abs/improving-streaming-end-to-end-asr-on-transformer-based-causal-models-with-encoder-states-revision-strategies-2207.02495</loc><lastmod>2022-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-streaming-end-to-end-asr-on-transformer-based-causal-models-with-encoder-states-revision-strategies-2207.02495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-streaming-end-to-end-asr-on-transformer-based-causal-models-with-encoder-states-revision-strategies-2207.02495"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-to-punctuated-text-recognition-2207.03169</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-to-punctuated-text-recognition-2207.03169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-to-punctuated-text-recognition-2207.03169"/></url>
<url><loc>https://scifaro.com/en/abs/nesc-robust-neural-end-2-end-speech-coding-with-gans-2207.03282</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nesc-robust-neural-end-2-end-speech-coding-with-gans-2207.03282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nesc-robust-neural-end-2-end-speech-coding-with-gans-2207.03282"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-low-footprint-wake-word-detection-using-knowledge-distillation-2207.03331</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-low-footprint-wake-word-detection-using-knowledge-distillation-2207.03331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-low-footprint-wake-word-detection-using-knowledge-distillation-2207.03331"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-investigating-model-representations-multi-task-learning-and-knowledge-distillation-2207.03334</loc><lastmod>2022-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-investigating-model-representations-multi-task-learning-and-knowledge-distillation-2207.03334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-investigating-model-representations-multi-task-learning-and-knowledge-distillation-2207.03334"/></url>
<url><loc>https://scifaro.com/en/abs/bibletts-a-large-high-fidelity-multilingual-and-uniquely-african-speech-corpus-2207.03546</loc><lastmod>2022-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bibletts-a-large-high-fidelity-multilingual-and-uniquely-african-speech-corpus-2207.03546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bibletts-a-large-high-fidelity-multilingual-and-uniquely-african-speech-corpus-2207.03546"/></url>
<url><loc>https://scifaro.com/en/abs/the-acii-2022-affective-vocal-bursts-workshop-competition-understanding-a-critically-understudied-modality-of-emotional-expression-2207.03572</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-acii-2022-affective-vocal-bursts-workshop-competition-understanding-a-critically-understudied-modality-of-emotional-expression-2207.03572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-acii-2022-affective-vocal-bursts-workshop-competition-understanding-a-critically-understudied-modality-of-emotional-expression-2207.03572"/></url>
<url><loc>https://scifaro.com/en/abs/rhythm-and-form-in-music-a-complex-systems-approach-2207.03602</loc><lastmod>2022-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythm-and-form-in-music-a-complex-systems-approach-2207.03602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythm-and-form-in-music-a-complex-systems-approach-2207.03602"/></url>
<url><loc>https://scifaro.com/en/abs/tandem-multitask-training-of-speaker-diarisation-and-speech-recognition-for-meeting-transcription-2207.03852</loc><lastmod>2022-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tandem-multitask-training-of-speaker-diarisation-and-speech-recognition-for-meeting-transcription-2207.03852"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tandem-multitask-training-of-speaker-diarisation-and-speech-recognition-for-meeting-transcription-2207.03852"/></url>
<url><loc>https://scifaro.com/en/abs/graph-based-multi-view-fusion-and-local-adaptation-mitigating-within-household-confusability-for-speaker-identification-2207.04081</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-based-multi-view-fusion-and-local-adaptation-mitigating-within-household-confusability-for-speaker-identification-2207.04081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-based-multi-view-fusion-and-local-adaptation-mitigating-within-household-confusability-for-speaker-identification-2207.04081"/></url>
<url><loc>https://scifaro.com/en/abs/internal-language-model-estimation-based-language-model-fusion-for-cross-domain-code-switching-speech-recognition-2207.04176</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/internal-language-model-estimation-based-language-model-fusion-for-cross-domain-code-switching-speech-recognition-2207.04176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/internal-language-model-estimation-based-language-model-fusion-for-cross-domain-code-switching-speech-recognition-2207.04176"/></url>
<url><loc>https://scifaro.com/en/abs/intermediate-layer-output-regularization-for-attention-based-speech-recognition-with-shared-decoder-2207.04177</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intermediate-layer-output-regularization-for-attention-based-speech-recognition-with-shared-decoder-2207.04177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intermediate-layer-output-regularization-for-attention-based-speech-recognition-with-shared-decoder-2207.04177"/></url>
<url><loc>https://scifaro.com/en/abs/multi-frequency-information-enhanced-channel-attention-module-for-speaker-representation-learning-2207.04540</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-frequency-information-enhanced-channel-attention-module-for-speaker-representation-learning-2207.04540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-frequency-information-enhanced-channel-attention-module-for-speaker-representation-learning-2207.04540"/></url>
<url><loc>https://scifaro.com/en/abs/pmct-patched-multi-condition-training-for-robust-speech-recognition-2207.04949</loc><lastmod>2022-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pmct-patched-multi-condition-training-for-robust-speech-recognition-2207.04949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pmct-patched-multi-condition-training-for-robust-speech-recognition-2207.04949"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-modeling-from-de-identified-data-2207.05469</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-modeling-from-de-identified-data-2207.05469"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-modeling-from-de-identified-data-2207.05469"/></url>
<url><loc>https://scifaro.com/en/abs/label-efficient-self-supervised-speaker-verification-with-information-maximization-and-contrastive-learning-2207.05506</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-efficient-self-supervised-speaker-verification-with-information-maximization-and-contrastive-learning-2207.05506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-efficient-self-supervised-speaker-verification-with-information-maximization-and-contrastive-learning-2207.05506"/></url>
<url><loc>https://scifaro.com/en/abs/statistics-of-the-interaural-parameters-for-dichotic-tones-in-diotic-noise-n-0-s-psi-2207.05541</loc><lastmod>2022-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistics-of-the-interaural-parameters-for-dichotic-tones-in-diotic-noise-n-0-s-psi-2207.05541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistics-of-the-interaural-parameters-for-dichotic-tones-in-diotic-noise-n-0-s-psi-2207.05541"/></url>
<url><loc>https://scifaro.com/en/abs/poetictts-controllable-poetry-reading-for-literary-studies-2207.05549</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/poetictts-controllable-poetry-reading-for-literary-studies-2207.05549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/poetictts-controllable-poetry-reading-for-literary-studies-2207.05549"/></url>
<url><loc>https://scifaro.com/en/abs/a-cyclical-approach-to-synthetic-and-natural-speech-mismatch-refinement-of-neural-post-filter-for-low-cost-text-to-speech-system-2207.05913</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cyclical-approach-to-synthetic-and-natural-speech-mismatch-refinement-of-neural-post-filter-for-low-cost-text-to-speech-system-2207.05913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cyclical-approach-to-synthetic-and-natural-speech-mismatch-refinement-of-neural-post-filter-for-low-cost-text-to-speech-system-2207.05913"/></url>
<url><loc>https://scifaro.com/en/abs/online-target-speaker-voice-activity-detection-for-speaker-diarization-2207.05920</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-target-speaker-voice-activity-detection-for-speaker-diarization-2207.05920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-target-speaker-voice-activity-detection-for-speaker-diarization-2207.05920"/></url>
<url><loc>https://scifaro.com/en/abs/cross-age-speaker-verification-learning-age-invariant-speaker-embeddings-2207.05929</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-age-speaker-verification-learning-age-invariant-speaker-embeddings-2207.05929"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-age-speaker-verification-learning-age-invariant-speaker-embeddings-2207.05929"/></url>
<url><loc>https://scifaro.com/en/abs/satts-speaker-attractor-text-to-speech-learning-to-speak-by-learning-to-separate-2207.06011</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/satts-speaker-attractor-text-to-speech-learning-to-speak-by-learning-to-separate-2207.06011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/satts-speaker-attractor-text-to-speech-learning-to-speak-by-learning-to-separate-2207.06011"/></url>
<url><loc>https://scifaro.com/en/abs/mm-alt-a-multimodal-automatic-lyric-transcription-system-2207.06127</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mm-alt-a-multimodal-automatic-lyric-transcription-system-2207.06127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mm-alt-a-multimodal-automatic-lyric-transcription-system-2207.06127"/></url>
<url><loc>https://scifaro.com/en/abs/prodiff-progressive-fast-diffusion-model-for-high-quality-text-to-speech-2207.06389</loc><lastmod>2022-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prodiff-progressive-fast-diffusion-model-for-high-quality-text-to-speech-2207.06389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prodiff-progressive-fast-diffusion-model-for-high-quality-text-to-speech-2207.06389"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-network-framework-for-the-design-of-individualised-hearing-loss-compensation-2207.07091</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-network-framework-for-the-design-of-individualised-hearing-loss-compensation-2207.07091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-network-framework-for-the-design-of-individualised-hearing-loss-compensation-2207.07091"/></url>
<url><loc>https://scifaro.com/en/abs/direction-aware-joint-adaptation-of-neural-speech-enhancement-and-recognition-in-real-multiparty-conversational-environments-2207.07273</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-aware-joint-adaptation-of-neural-speech-enhancement-and-recognition-in-real-multiparty-conversational-environments-2207.07273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-aware-joint-adaptation-of-neural-speech-enhancement-and-recognition-in-real-multiparty-conversational-environments-2207.07273"/></url>
<url><loc>https://scifaro.com/en/abs/direction-aware-adaptive-online-neural-speech-enhancement-with-an-augmented-reality-headset-in-real-noisy-conversational-environments-2207.07296</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-aware-adaptive-online-neural-speech-enhancement-with-an-augmented-reality-headset-in-real-noisy-conversational-environments-2207.07296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-aware-adaptive-online-neural-speech-enhancement-with-an-augmented-reality-headset-in-real-noisy-conversational-environments-2207.07296"/></url>
<url><loc>https://scifaro.com/en/abs/mimo-doanet-multi-channel-input-and-multiple-outputs-doa-network-with-unknown-number-of-sound-sources-2207.07307</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimo-doanet-multi-channel-input-and-multiple-outputs-doa-network-with-unknown-number-of-sound-sources-2207.07307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimo-doanet-multi-channel-input-and-multiple-outputs-doa-network-with-unknown-number-of-sound-sources-2207.07307"/></url>
<url><loc>https://scifaro.com/en/abs/polyscriber-integrated-fine-tuning-of-extractor-and-lyrics-transcriber-for-polyphonic-music-2207.07336</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polyscriber-integrated-fine-tuning-of-extractor-and-lyrics-transcriber-for-polyphonic-music-2207.07336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polyscriber-integrated-fine-tuning-of-extractor-and-lyrics-transcriber-for-polyphonic-music-2207.07336"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-oppo-system-for-the-2022-spoofing-aware-speaker-verification-challenge-2207.07510</loc><lastmod>2022-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-oppo-system-for-the-2022-spoofing-aware-speaker-verification-challenge-2207.07510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-oppo-system-for-the-2022-spoofing-aware-speaker-verification-challenge-2207.07510"/></url>
<url><loc>https://scifaro.com/en/abs/segment-level-metric-learning-for-few-shot-bioacoustic-event-detection-2207.07773</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segment-level-metric-learning-for-few-shot-bioacoustic-event-detection-2207.07773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segment-level-metric-learning-for-few-shot-bioacoustic-event-detection-2207.07773"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-reweighting-for-speaker-verification-fairness-2207.07776</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-reweighting-for-speaker-verification-fairness-2207.07776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-reweighting-for-speaker-verification-fairness-2207.07776"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-geographic-disparities-in-automatic-speech-recognition-via-elastic-weight-consolidation-2207.07850</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-geographic-disparities-in-automatic-speech-recognition-via-elastic-weight-consolidation-2207.07850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-geographic-disparities-in-automatic-speech-recognition-via-elastic-weight-consolidation-2207.07850"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-target-speech-enhancement-based-on-erb-scaled-spatial-coherence-features-2207.08126</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-target-speech-enhancement-based-on-erb-scaled-spatial-coherence-features-2207.08126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-target-speech-enhancement-based-on-erb-scaled-spatial-coherence-features-2207.08126"/></url>
<url><loc>https://scifaro.com/en/abs/improving-spatial-cues-for-hearables-using-a-parameterized-binaural-cdr-estimator-2207.08314</loc><lastmod>2022-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-spatial-cues-for-hearables-using-a-parameterized-binaural-cdr-estimator-2207.08314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-spatial-cues-for-hearables-using-a-parameterized-binaural-cdr-estimator-2207.08314"/></url>
<url><loc>https://scifaro.com/en/abs/gafx-a-general-audio-feature-extractor-2207.09145</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gafx-a-general-audio-feature-extractor-2207.09145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gafx-a-general-audio-feature-extractor-2207.09145"/></url>
<url><loc>https://scifaro.com/en/abs/do-uhear-validation-of-uhear-app-for-preliminary-screening-of-hearing-ability-in-soundscape-studies-2207.09221</loc><lastmod>2022-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-uhear-validation-of-uhear-app-for-preliminary-screening-of-hearing-ability-in-soundscape-studies-2207.09221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-uhear-validation-of-uhear-app-for-preliminary-screening-of-hearing-ability-in-soundscape-studies-2207.09221"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-se-speech-enhancement-for-robust-speech-recognition-translation-and-understanding-2207.09514</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-se-speech-enhancement-for-robust-speech-recognition-translation-and-understanding-2207.09514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-se-speech-enhancement-for-robust-speech-recognition-translation-and-understanding-2207.09514"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-auxiliary-text-query-modifier-to-content-based-audio-retrieval-2207.09732</loc><lastmod>2022-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-auxiliary-text-query-modifier-to-content-based-audio-retrieval-2207.09732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-auxiliary-text-query-modifier-to-content-based-audio-retrieval-2207.09732"/></url>
<url><loc>https://scifaro.com/en/abs/direct-and-residual-subspace-decomposition-of-spatial-room-impulse-responses-2207.09733</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direct-and-residual-subspace-decomposition-of-spatial-room-impulse-responses-2207.09733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direct-and-residual-subspace-decomposition-of-spatial-room-impulse-responses-2207.09733"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-of-wav2vec-2-0-for-automatic-lyric-transcription-2207.09747</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-of-wav2vec-2-0-for-automatic-lyric-transcription-2207.09747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-of-wav2vec-2-0-for-automatic-lyric-transcription-2207.09747"/></url>
<url><loc>https://scifaro.com/en/abs/jointly-predicting-emotion-age-and-country-using-pre-trained-acoustic-embedding-2207.10333</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jointly-predicting-emotion-age-and-country-using-pre-trained-acoustic-embedding-2207.10333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jointly-predicting-emotion-age-and-country-using-pre-trained-acoustic-embedding-2207.10333"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-free-low-latency-adaptive-speech-enhancement-based-on-frame-online-beamforming-powered-by-block-online-fastmnmf-2207.10934</loc><lastmod>2022-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-free-low-latency-adaptive-speech-enhancement-based-on-frame-online-beamforming-powered-by-block-online-fastmnmf-2207.10934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-free-low-latency-adaptive-speech-enhancement-based-on-frame-online-beamforming-powered-by-block-online-fastmnmf-2207.10934"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-echo-cancellation-with-neural-kalman-filtering-2207.11388</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-echo-cancellation-with-neural-kalman-filtering-2207.11388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-echo-cancellation-with-neural-kalman-filtering-2207.11388"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2207.11529</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2207.11529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2207.11529"/></url>
<url><loc>https://scifaro.com/en/abs/learning-a-dual-mode-speech-recognition-model-via-self-pruning-2207.11906</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-a-dual-mode-speech-recognition-model-via-self-pruning-2207.11906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-a-dual-mode-speech-recognition-model-via-self-pruning-2207.11906"/></url>
<url><loc>https://scifaro.com/en/abs/conceptbeam-concept-driven-target-speech-extraction-2207.11964</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conceptbeam-concept-driven-target-speech-extraction-2207.11964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conceptbeam-concept-driven-target-speech-extraction-2207.11964"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-data-selection-for-speech-recognition-with-contrastive-loss-ratios-2207.12028</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-data-selection-for-speech-recognition-with-contrastive-loss-ratios-2207.12028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-data-selection-for-speech-recognition-with-contrastive-loss-ratios-2207.12028"/></url>
<url><loc>https://scifaro.com/en/abs/a-polyphone-bert-for-polyphone-disambiguation-in-mandarin-chinese-2207.12089</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-polyphone-bert-for-polyphone-disambiguation-in-mandarin-chinese-2207.12089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-polyphone-bert-for-polyphone-disambiguation-in-mandarin-chinese-2207.12089"/></url>
<url><loc>https://scifaro.com/en/abs/label-uncertainty-modeling-and-prediction-for-speech-emotion-recognition-using-t-distributions-2207.12135</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-uncertainty-modeling-and-prediction-for-speech-emotion-recognition-using-t-distributions-2207.12135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-uncertainty-modeling-and-prediction-for-speech-emotion-recognition-using-t-distributions-2207.12135"/></url>
<url><loc>https://scifaro.com/en/abs/transplantation-of-conversational-speaking-style-with-interjections-in-sequence-to-sequence-speech-synthesis-2207.12262</loc><lastmod>2022-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transplantation-of-conversational-speaking-style-with-interjections-in-sequence-to-sequence-speech-synthesis-2207.12262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transplantation-of-conversational-speaking-style-with-interjections-in-sequence-to-sequence-speech-synthesis-2207.12262"/></url>
<url><loc>https://scifaro.com/en/abs/implementation-of-tiny-machine-learning-models-on-arduino-33-ble-for-gesture-and-speech-recognition-2207.12866</loc><lastmod>2022-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementation-of-tiny-machine-learning-models-on-arduino-33-ble-for-gesture-and-speech-recognition-2207.12866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementation-of-tiny-machine-learning-models-on-arduino-33-ble-for-gesture-and-speech-recognition-2207.12866"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-speech-emotion-recognition-using-cross-attention-with-aligned-audio-and-text-2207.12895</loc><lastmod>2022-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-speech-emotion-recognition-using-cross-attention-with-aligned-audio-and-text-2207.12895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-speech-emotion-recognition-using-cross-attention-with-aligned-audio-and-text-2207.12895"/></url>
<url><loc>https://scifaro.com/en/abs/assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2207.12899</loc><lastmod>2023-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2207.12899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessment-of-a-cost-effective-headphone-calibration-procedure-for-soundscape-evaluations-2207.12899"/></url>
<url><loc>https://scifaro.com/en/abs/subword-dictionary-learning-and-segmentation-techniques-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13331</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subword-dictionary-learning-and-segmentation-techniques-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subword-dictionary-learning-and-segmentation-techniques-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13331"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-driven-subword-grammar-modeling-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13333</loc><lastmod>2022-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-driven-subword-grammar-modeling-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-driven-subword-grammar-modeling-for-automatic-speech-recognition-in-tamil-and-kannada-2207.13333"/></url>
<url><loc>https://scifaro.com/en/abs/utterance-by-utterance-overlap-aware-neural-diarization-with-graph-pit-2207.13888</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utterance-by-utterance-overlap-aware-neural-diarization-with-graph-pit-2207.13888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utterance-by-utterance-overlap-aware-neural-diarization-with-graph-pit-2207.13888"/></url>
<url><loc>https://scifaro.com/en/abs/a-unifying-view-on-blind-source-separation-of-convolutive-mixtures-based-on-independent-component-analysis-2207.13934</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unifying-view-on-blind-source-separation-of-convolutive-mixtures-based-on-independent-component-analysis-2207.13934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unifying-view-on-blind-source-separation-of-convolutive-mixtures-based-on-independent-component-analysis-2207.13934"/></url>
<url><loc>https://scifaro.com/en/abs/extending-rnn-t-based-speech-recognition-systems-with-emotion-and-language-classification-2207.13965</loc><lastmod>2022-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-rnn-t-based-speech-recognition-systems-with-emotion-and-language-classification-2207.13965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-rnn-t-based-speech-recognition-systems-with-emotion-and-language-classification-2207.13965"/></url>
<url><loc>https://scifaro.com/en/abs/dialogue-enhancement-and-listening-effort-in-broadcast-audio-a-multimodal-evaluation-2207.14240</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialogue-enhancement-and-listening-effort-in-broadcast-audio-a-multimodal-evaluation-2207.14240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialogue-enhancement-and-listening-effort-in-broadcast-audio-a-multimodal-evaluation-2207.14240"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-global-hrtfs-from-scanned-head-geometry-using-deep-learning-and-compact-representations-2207.14352</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-global-hrtfs-from-scanned-head-geometry-using-deep-learning-and-compact-representations-2207.14352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-global-hrtfs-from-scanned-head-geometry-using-deep-learning-and-compact-representations-2207.14352"/></url>
<url><loc>https://scifaro.com/en/abs/low-data-no-problem-low-resource-language-agnostic-conversational-text-to-speech-via-f0-conditioned-data-augmentation-2207.14607</loc><lastmod>2022-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-data-no-problem-low-resource-language-agnostic-conversational-text-to-speech-via-f0-conditioned-data-augmentation-2207.14607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-data-no-problem-low-resource-language-agnostic-conversational-text-to-speech-via-f0-conditioned-data-augmentation-2207.14607"/></url>
<url><loc>https://scifaro.com/en/abs/dent-ddsp-data-efficient-noisy-speech-generator-using-differentiable-digital-signal-processors-for-explicit-distortion-modelling-and-noise-robust-speech-recognition-2208.00987</loc><lastmod>2022-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dent-ddsp-data-efficient-noisy-speech-generator-using-differentiable-digital-signal-processors-for-explicit-distortion-modelling-and-noise-robust-speech-recognition-2208.00987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dent-ddsp-data-efficient-noisy-speech-generator-using-differentiable-digital-signal-processors-for-explicit-distortion-modelling-and-noise-robust-speech-recognition-2208.00987"/></url>
<url><loc>https://scifaro.com/en/abs/voice-analysis-for-stress-detection-and-application-in-virtual-reality-to-improve-public-speaking-in-real-time-a-review-2208.01041</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-analysis-for-stress-detection-and-application-in-virtual-reality-to-improve-public-speaking-in-real-time-a-review-2208.01041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-analysis-for-stress-detection-and-application-in-virtual-reality-to-improve-public-speaking-in-real-time-a-review-2208.01041"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2208.01555</loc><lastmod>2022-08-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2208.01555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-cnns-for-acoustic-scene-classification-2208.01555"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-of-modeling-rising-intonation-in-cantonese-neural-speech-synthesis-2208.02189</loc><lastmod>2022-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-of-modeling-rising-intonation-in-cantonese-neural-speech-synthesis-2208.02189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-of-modeling-rising-intonation-in-cantonese-neural-speech-synthesis-2208.02189"/></url>
<url><loc>https://scifaro.com/en/abs/domestic-activity-clustering-from-audio-via-depthwise-separable-convolutional-autoencoder-network-2208.02406</loc><lastmod>2022-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domestic-activity-clustering-from-audio-via-depthwise-separable-convolutional-autoencoder-network-2208.02406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domestic-activity-clustering-from-audio-via-depthwise-separable-convolutional-autoencoder-network-2208.02406"/></url>
<url><loc>https://scifaro.com/en/abs/attention-and-dct-based-global-context-modeling-for-text-independent-speaker-recognition-2208.02778</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-and-dct-based-global-context-modeling-for-text-independent-speaker-recognition-2208.02778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-and-dct-based-global-context-modeling-for-text-independent-speaker-recognition-2208.02778"/></url>
<url><loc>https://scifaro.com/en/abs/aid-open-source-anechoic-interferer-dataset-2208.03023</loc><lastmod>2022-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aid-open-source-anechoic-interferer-dataset-2208.03023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aid-open-source-anechoic-interferer-dataset-2208.03023"/></url>
<url><loc>https://scifaro.com/en/abs/ssdpt-self-supervised-dual-path-transformer-for-anomalous-sound-detection-in-machine-condition-monitoring-2208.03421</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssdpt-self-supervised-dual-path-transformer-for-anomalous-sound-detection-in-machine-condition-monitoring-2208.03421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssdpt-self-supervised-dual-path-transformer-for-anomalous-sound-detection-in-machine-condition-monitoring-2208.03421"/></url>
<url><loc>https://scifaro.com/en/abs/fra-rir-fast-random-approximation-of-the-image-source-method-2208.04101</loc><lastmod>2022-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fra-rir-fast-random-approximation-of-the-image-source-method-2208.04101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fra-rir-fast-random-approximation-of-the-image-source-method-2208.04101"/></url>
<url><loc>https://scifaro.com/en/abs/an-anchor-free-detector-for-continuous-speech-keyword-spotting-2208.04622</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-anchor-free-detector-for-continuous-speech-keyword-spotting-2208.04622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-anchor-free-detector-for-continuous-speech-keyword-spotting-2208.04622"/></url>
<url><loc>https://scifaro.com/en/abs/recycling-an-anechoic-pre-trained-speech-separation-deep-neural-network-for-binaural-dereverberation-of-a-single-source-2208.04626</loc><lastmod>2022-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recycling-an-anechoic-pre-trained-speech-separation-deep-neural-network-for-binaural-dereverberation-of-a-single-source-2208.04626"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recycling-an-anechoic-pre-trained-speech-separation-deep-neural-network-for-binaural-dereverberation-of-a-single-source-2208.04626"/></url>
<url><loc>https://scifaro.com/en/abs/extending-gcc-phat-using-shift-equivariant-neural-networks-2208.04654</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-gcc-phat-using-shift-equivariant-neural-networks-2208.04654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-gcc-phat-using-shift-equivariant-neural-networks-2208.04654"/></url>
<url><loc>https://scifaro.com/en/abs/improving-hypernasality-estimation-with-automatic-speech-recognition-in-cleft-palate-speech-2208.05122</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-hypernasality-estimation-with-automatic-speech-recognition-in-cleft-palate-speech-2208.05122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-hypernasality-estimation-with-automatic-speech-recognition-in-cleft-palate-speech-2208.05122"/></url>
<url><loc>https://scifaro.com/en/abs/preserving-the-beamforming-effect-for-spatial-cue-based-pseudo-binaural-dereverberation-of-a-single-source-2208.05184</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preserving-the-beamforming-effect-for-spatial-cue-based-pseudo-binaural-dereverberation-of-a-single-source-2208.05184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preserving-the-beamforming-effect-for-spatial-cue-based-pseudo-binaural-dereverberation-of-a-single-source-2208.05184"/></url>
<url><loc>https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-of-utterance-level-speech-representations-2208.05413</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-of-utterance-level-speech-representations-2208.05413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-of-utterance-level-speech-representations-2208.05413"/></url>
<url><loc>https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-for-utterance-level-information-extraction-from-speech-2208.05445</loc><lastmod>2022-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-for-utterance-level-information-extraction-from-speech-2208.05445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-contrastive-self-supervised-learning-for-utterance-level-information-extraction-from-speech-2208.05445"/></url>
<url><loc>https://scifaro.com/en/abs/chewing-detection-from-commercial-smart-glasses-2208.05735</loc><lastmod>2022-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chewing-detection-from-commercial-smart-glasses-2208.05735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chewing-detection-from-commercial-smart-glasses-2208.05735"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-and-analysis-of-new-curriculum-criteria-for-end-to-end-asr-2208.05782</loc><lastmod>2022-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-and-analysis-of-new-curriculum-criteria-for-end-to-end-asr-2208.05782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-and-analysis-of-new-curriculum-criteria-for-end-to-end-asr-2208.05782"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-and-dereverberation-with-diffusion-based-generative-models-2208.05830</loc><lastmod>2025-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-and-dereverberation-with-diffusion-based-generative-models-2208.05830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-and-dereverberation-with-diffusion-based-generative-models-2208.05830"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-world-synthesizer-based-neural-vocoder-with-application-to-end-to-end-audio-style-transfer-2208.07282</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-world-synthesizer-based-neural-vocoder-with-application-to-end-to-end-audio-style-transfer-2208.07282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-world-synthesizer-based-neural-vocoder-with-application-to-end-to-end-audio-style-transfer-2208.07282"/></url>
<url><loc>https://scifaro.com/en/abs/c3-dino-joint-contrastive-and-non-contrastive-self-supervised-learning-for-speaker-verification-2208.07446</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/c3-dino-joint-contrastive-and-non-contrastive-self-supervised-learning-for-speaker-verification-2208.07446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/c3-dino-joint-contrastive-and-non-contrastive-self-supervised-learning-for-speaker-verification-2208.07446"/></url>
<url><loc>https://scifaro.com/en/abs/uconv-conformer-high-reduction-of-input-sequence-length-for-end-to-end-speech-recognition-2208.07657</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uconv-conformer-high-reduction-of-input-sequence-length-for-end-to-end-speech-recognition-2208.07657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uconv-conformer-high-reduction-of-input-sequence-length-for-end-to-end-speech-recognition-2208.07657"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-speaker-representation-learning-via-mutual-information-minimization-2208.08012</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-speaker-representation-learning-via-mutual-information-minimization-2208.08012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-speaker-representation-learning-via-mutual-information-minimization-2208.08012"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representation-disentanglement-with-adversarial-mutual-information-learning-for-one-shot-voice-conversion-2208.08757</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representation-disentanglement-with-adversarial-mutual-information-learning-for-one-shot-voice-conversion-2208.08757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representation-disentanglement-with-adversarial-mutual-information-learning-for-one-shot-voice-conversion-2208.08757"/></url>
<url><loc>https://scifaro.com/en/abs/visualising-model-training-via-vowel-space-for-text-to-speech-systems-2208.09775</loc><lastmod>2022-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visualising-model-training-via-vowel-space-for-text-to-speech-systems-2208.09775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visualising-model-training-via-vowel-space-for-text-to-speech-systems-2208.09775"/></url>
<url><loc>https://scifaro.com/en/abs/spatially-selective-active-noise-control-systems-2208.09997</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatially-selective-active-noise-control-systems-2208.09997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatially-selective-active-noise-control-systems-2208.09997"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-sensitivity-of-deep-feature-based-evaluation-metrics-for-audio-textures-2208.10743</loc><lastmod>2022-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-sensitivity-of-deep-feature-based-evaluation-metrics-for-audio-textures-2208.10743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-sensitivity-of-deep-feature-based-evaluation-metrics-for-audio-textures-2208.10743"/></url>
<url><loc>https://scifaro.com/en/abs/a-convolutional-plane-wave-model-for-sound-field-reconstruction-2208.11324</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-convolutional-plane-wave-model-for-sound-field-reconstruction-2208.11324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-convolutional-plane-wave-model-for-sound-field-reconstruction-2208.11324"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-music-mixing-with-deep-learning-and-out-of-domain-data-2208.11428</loc><lastmod>2022-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-music-mixing-with-deep-learning-and-out-of-domain-data-2208.11428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-music-mixing-with-deep-learning-and-out-of-domain-data-2208.11428"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-speech-perception-from-non-invasive-brain-recordings-2208.12266</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-speech-perception-from-non-invasive-brain-recordings-2208.12266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-speech-perception-from-non-invasive-brain-recordings-2208.12266"/></url>
<url><loc>https://scifaro.com/en/abs/mulan-a-joint-embedding-of-music-audio-and-natural-language-2208.12415</loc><lastmod>2022-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mulan-a-joint-embedding-of-music-audio-and-natural-language-2208.12415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mulan-a-joint-embedding-of-music-audio-and-natural-language-2208.12415"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-using-supervised-deep-recurrent-system-for-mental-health-monitoring-2208.12812</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-supervised-deep-recurrent-system-for-mental-health-monitoring-2208.12812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-using-supervised-deep-recurrent-system-for-mental-health-monitoring-2208.12812"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-transformers-and-its-integration-with-end-to-end-neural-diarization-2208.13085</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-transformers-and-its-integration-with-end-to-end-neural-diarization-2208.13085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-with-transformers-and-its-integration-with-end-to-end-neural-diarization-2208.13085"/></url>
<url><loc>https://scifaro.com/en/abs/a-language-agnostic-multilingual-streaming-on-device-asr-system-2208.13916</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-language-agnostic-multilingual-streaming-on-device-asr-system-2208.13916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-language-agnostic-multilingual-streaming-on-device-asr-system-2208.13916"/></url>
<url><loc>https://scifaro.com/en/abs/classify-respiratory-abnormality-in-lung-sounds-using-stft-and-a-fine-tuned-resnet18-network-2208.13943</loc><lastmod>2022-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classify-respiratory-abnormality-in-lung-sounds-using-stft-and-a-fine-tuned-resnet18-network-2208.13943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classify-respiratory-abnormality-in-lung-sounds-using-stft-and-a-fine-tuned-resnet18-network-2208.13943"/></url>
<url><loc>https://scifaro.com/en/abs/singing-beat-tracking-with-self-supervised-front-end-and-linear-transformers-2208.14578</loc><lastmod>2022-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-beat-tracking-with-self-supervised-front-end-and-linear-transformers-2208.14578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-beat-tracking-with-self-supervised-front-end-and-linear-transformers-2208.14578"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-aware-attention-based-asv-back-end-with-multiple-enrollment-utterances-and-a-sampling-strategy-for-the-sasv-challenge-2022-2209.00423</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-aware-attention-based-asv-back-end-with-multiple-enrollment-utterances-and-a-sampling-strategy-for-the-sasv-challenge-2022-2209.00423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-aware-attention-based-asv-back-end-with-multiple-enrollment-utterances-and-a-sampling-strategy-for-the-sasv-challenge-2022-2209.00423"/></url>
<url><loc>https://scifaro.com/en/abs/joint-speaker-encoder-and-neural-back-end-model-for-fully-end-to-end-automatic-speaker-verification-with-multiple-enrollment-utterances-2209.00485</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-speaker-encoder-and-neural-back-end-model-for-fully-end-to-end-automatic-speaker-verification-with-multiple-enrollment-utterances-2209.00485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-speaker-encoder-and-neural-back-end-model-for-fully-end-to-end-automatic-speaker-verification-with-multiple-enrollment-utterances-2209.00485"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-potential-of-jointly-optimised-solutions-to-spoofing-attack-detection-and-automatic-speaker-verification-2209.00506</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-potential-of-jointly-optimised-solutions-to-spoofing-attack-detection-and-automatic-speaker-verification-2209.00506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-potential-of-jointly-optimised-solutions-to-spoofing-attack-detection-and-automatic-speaker-verification-2209.00506"/></url>
<url><loc>https://scifaro.com/en/abs/dialogic-non-invasive-speaker-focused-data-acquisition-for-team-behavior-modeling-2209.00619</loc><lastmod>2022-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialogic-non-invasive-speaker-focused-data-acquisition-for-team-behavior-modeling-2209.00619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialogic-non-invasive-speaker-focused-data-acquisition-for-team-behavior-modeling-2209.00619"/></url>
<url><loc>https://scifaro.com/en/abs/a-wavelet-transform-based-scheme-to-extract-speech-pitch-and-formant-frequencies-2209.00733</loc><lastmod>2022-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-wavelet-transform-based-scheme-to-extract-speech-pitch-and-formant-frequencies-2209.00733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-wavelet-transform-based-scheme-to-extract-speech-pitch-and-formant-frequencies-2209.00733"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-temporal-frequency-attention-for-music-source-separation-2209.00805</loc><lastmod>2022-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-temporal-frequency-attention-for-music-source-separation-2209.00805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-temporal-frequency-attention-for-music-source-separation-2209.00805"/></url>
<url><loc>https://scifaro.com/en/abs/tb-or-not-tb-acoustic-cough-analysis-for-tuberculosis-classification-2209.00934</loc><lastmod>2022-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tb-or-not-tb-acoustic-cough-analysis-for-tuberculosis-classification-2209.00934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tb-or-not-tb-acoustic-cough-analysis-for-tuberculosis-classification-2209.00934"/></url>
<url><loc>https://scifaro.com/en/abs/inverse-free-online-independent-vector-analysis-with-flexible-iterative-source-steering-2209.00937</loc><lastmod>2022-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inverse-free-online-independent-vector-analysis-with-flexible-iterative-source-steering-2209.00937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inverse-free-online-independent-vector-analysis-with-flexible-iterative-source-steering-2209.00937"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speech-super-resolution-with-gan-based-modeling-for-telephony-speaker-verification-2209.01702</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speech-super-resolution-with-gan-based-modeling-for-telephony-speaker-verification-2209.01702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speech-super-resolution-with-gan-based-modeling-for-telephony-speaker-verification-2209.01702"/></url>
<url><loc>https://scifaro.com/en/abs/movement-detection-of-tongue-and-related-body-parts-using-ir-uwb-radar-2209.01762</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/movement-detection-of-tongue-and-related-body-parts-using-ir-uwb-radar-2209.01762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/movement-detection-of-tongue-and-related-body-parts-using-ir-uwb-radar-2209.01762"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-localization-and-detection-for-real-spatial-sound-scenes-event-independent-network-and-data-augmentation-chains-2209.01802</loc><lastmod>2022-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-for-real-spatial-sound-scenes-event-independent-network-and-data-augmentation-chains-2209.01802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-localization-and-detection-for-real-spatial-sound-scenes-event-independent-network-and-data-augmentation-chains-2209.01802"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-into-target-speaking-rate-adaptation-for-voice-conversion-2209.01978</loc><lastmod>2022-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-into-target-speaking-rate-adaptation-for-voice-conversion-2209.01978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-into-target-speaking-rate-adaptation-for-voice-conversion-2209.01978"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-target-speaker-asr-with-neural-transducer-2209.04175</loc><lastmod>2022-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-target-speaker-asr-with-neural-transducer-2209.04175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-target-speaker-asr-with-neural-transducer-2209.04175"/></url>
<url><loc>https://scifaro.com/en/abs/reconstructing-the-dynamic-directivity-of-unconstrained-speech-2209.04473</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstructing-the-dynamic-directivity-of-unconstrained-speech-2209.04473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstructing-the-dynamic-directivity-of-unconstrained-speech-2209.04473"/></url>
<url><loc>https://scifaro.com/en/abs/vararray-meets-t-sot-advancing-the-state-of-the-art-of-streaming-distant-conversational-speech-recognition-2209.04974</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vararray-meets-t-sot-advancing-the-state-of-the-art-of-streaming-distant-conversational-speech-recognition-2209.04974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vararray-meets-t-sot-advancing-the-state-of-the-art-of-streaming-distant-conversational-speech-recognition-2209.04974"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-head-related-transfer-function-representation-based-on-hyperspherical-harmonics-2209.05110</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-head-related-transfer-function-representation-based-on-hyperspherical-harmonics-2209.05110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-head-related-transfer-function-representation-based-on-hyperspherical-harmonics-2209.05110"/></url>
<url><loc>https://scifaro.com/en/abs/how-much-does-prosody-help-turn-taking-investigations-using-voice-activity-projection-models-2209.05161</loc><lastmod>2022-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-much-does-prosody-help-turn-taking-investigations-using-voice-activity-projection-models-2209.05161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-much-does-prosody-help-turn-taking-investigations-using-voice-activity-projection-models-2209.05161"/></url>
<url><loc>https://scifaro.com/en/abs/the-2022-far-field-speaker-verification-challenge-exploring-domain-mismatch-and-semi-supervised-learning-under-the-far-field-scenario-2209.05273</loc><lastmod>2022-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-2022-far-field-speaker-verification-challenge-exploring-domain-mismatch-and-semi-supervised-learning-under-the-far-field-scenario-2209.05273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-2022-far-field-speaker-verification-challenge-exploring-domain-mismatch-and-semi-supervised-learning-under-the-far-field-scenario-2209.05273"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-dependent-structure-for-utterances-in-asr-evaluation-2209.05281</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-dependent-structure-for-utterances-in-asr-evaluation-2209.05281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-dependent-structure-for-utterances-in-asr-evaluation-2209.05281"/></url>
<url><loc>https://scifaro.com/en/abs/learning-asr-pathways-a-sparse-multilingual-asr-model-2209.05735</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-asr-pathways-a-sparse-multilingual-asr-model-2209.05735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-asr-pathways-a-sparse-multilingual-asr-model-2209.05735"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-end-to-end-multilingual-speech-recognition-with-joint-language-identification-2209.06058</loc><lastmod>2022-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-end-to-end-multilingual-speech-recognition-with-joint-language-identification-2209.06058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-end-to-end-multilingual-speech-recognition-with-joint-language-identification-2209.06058"/></url>
<url><loc>https://scifaro.com/en/abs/automated-detection-of-pronunciation-errors-in-non-native-english-speech-employing-deep-learning-2209.06265</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-detection-of-pronunciation-errors-in-non-native-english-speech-employing-deep-learning-2209.06265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-detection-of-pronunciation-errors-in-non-native-english-speech-employing-deep-learning-2209.06265"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-synthesis-from-articulatory-representations-2209.06337</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-articulatory-representations-2209.06337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-articulatory-representations-2209.06337"/></url>
<url><loc>https://scifaro.com/en/abs/a-universally-deployable-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-voice-separation-2209.06410</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-universally-deployable-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-voice-separation-2209.06410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-universally-deployable-asr-frontend-for-joint-acoustic-echo-cancellation-speech-enhancement-and-voice-separation-2209.06410"/></url>
<url><loc>https://scifaro.com/en/abs/applying-wav2vec2-for-speech-recognition-on-bengali-common-voices-dataset-2209.06581</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-wav2vec2-for-speech-recognition-on-bengali-common-voices-dataset-2209.06581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-wav2vec2-for-speech-recognition-on-bengali-common-voices-dataset-2209.06581"/></url>
<url><loc>https://scifaro.com/en/abs/decoupled-pronunciation-and-prosody-modeling-in-meta-learning-based-multilingual-speech-synthesis-2209.06789</loc><lastmod>2022-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupled-pronunciation-and-prosody-modeling-in-meta-learning-based-multilingual-speech-synthesis-2209.06789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupled-pronunciation-and-prosody-modeling-in-meta-learning-based-multilingual-speech-synthesis-2209.06789"/></url>
<url><loc>https://scifaro.com/en/abs/essumm-extractive-speech-summarization-from-untranscribed-meeting-2209.06913</loc><lastmod>2022-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/essumm-extractive-speech-summarization-from-untranscribed-meeting-2209.06913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/essumm-extractive-speech-summarization-from-untranscribed-meeting-2209.06913"/></url>
<url><loc>https://scifaro.com/en/abs/open-challenges-in-synthetic-speech-detection-2209.07180</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-challenges-in-synthetic-speech-detection-2209.07180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-challenges-in-synthetic-speech-detection-2209.07180"/></url>
<url><loc>https://scifaro.com/en/abs/environment-classification-via-blind-roomprints-estimation-2209.07196</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/environment-classification-via-blind-roomprints-estimation-2209.07196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/environment-classification-via-blind-roomprints-estimation-2209.07196"/></url>
<url><loc>https://scifaro.com/en/abs/an-automatic-speech-recognition-system-for-bengali-language-based-on-wav2vec2-and-transfer-learning-2209.08119</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automatic-speech-recognition-system-for-bengali-language-based-on-wav2vec2-and-transfer-learning-2209.08119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automatic-speech-recognition-system-for-bengali-language-based-on-wav2vec2-and-transfer-learning-2209.08119"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-conformers-via-sharing-sparsely-gated-experts-for-end-to-end-speech-recognition-2209.08326</loc><lastmod>2022-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-conformers-via-sharing-sparsely-gated-experts-for-end-to-end-speech-recognition-2209.08326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-conformers-via-sharing-sparsely-gated-experts-for-end-to-end-speech-recognition-2209.08326"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-strategies-to-model-pathological-speech-effect-of-multiple-spectral-resolutions-2209.08379</loc><lastmod>2022-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-strategies-to-model-pathological-speech-effect-of-multiple-spectral-resolutions-2209.08379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-strategies-to-model-pathological-speech-effect-of-multiple-spectral-resolutions-2209.08379"/></url>
<url><loc>https://scifaro.com/en/abs/a-combined-model-for-noise-reduction-of-lung-sound-signals-based-on-empirical-mode-decomposition-and-artificial-neural-network-2209.09512</loc><lastmod>2022-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-combined-model-for-noise-reduction-of-lung-sound-signals-based-on-empirical-mode-decomposition-and-artificial-neural-network-2209.09512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-combined-model-for-noise-reduction-of-lung-sound-signals-based-on-empirical-mode-decomposition-and-artificial-neural-network-2209.09512"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-onnx-bridging-a-gap-between-research-and-production-2209.09756</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-onnx-bridging-a-gap-between-research-and-production-2209.09756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-onnx-bridging-a-gap-between-research-and-production-2209.09756"/></url>
<url><loc>https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2209.09967</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2209.09967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-based-audio-retrieval-task-in-dcase-2022-challenge-2209.09967"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-star-gans-for-voice-conversion-with-contrastive-discriminator-2209.10088</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-star-gans-for-voice-conversion-with-contrastive-discriminator-2209.10088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-star-gans-for-voice-conversion-with-contrastive-discriminator-2209.10088"/></url>
<url><loc>https://scifaro.com/en/abs/the-returnzero-system-for-voxceleb-speaker-recognition-challenge-2022-2209.10147</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-returnzero-system-for-voxceleb-speaker-recognition-challenge-2022-2209.10147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-returnzero-system-for-voxceleb-speaker-recognition-challenge-2022-2209.10147"/></url>
<url><loc>https://scifaro.com/en/abs/gist-aiter-system-for-the-diarization-task-of-the-2022-voxceleb-speaker-recognition-challenge-2209.10357</loc><lastmod>2022-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gist-aiter-system-for-the-diarization-task-of-the-2022-voxceleb-speaker-recognition-challenge-2209.10357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gist-aiter-system-for-the-diarization-task-of-the-2022-voxceleb-speaker-recognition-challenge-2209.10357"/></url>
<url><loc>https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-with-denoising-diffusion-probabilistic-wasserstein-gan-2209.10446</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-with-denoising-diffusion-probabilistic-wasserstein-gan-2209.10446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mandarin-singing-voice-synthesis-with-denoising-diffusion-probabilistic-wasserstein-gan-2209.10446"/></url>
<url><loc>https://scifaro.com/en/abs/an-initial-study-on-birdsong-re-synthesis-using-neural-vocoders-2209.10479</loc><lastmod>2022-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-initial-study-on-birdsong-re-synthesis-using-neural-vocoders-2209.10479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-initial-study-on-birdsong-re-synthesis-using-neural-vocoders-2209.10479"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-asr-model-quality-on-disordered-speech-using-bertscore-2209.10591</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-asr-model-quality-on-disordered-speech-using-bertscore-2209.10591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-asr-model-quality-on-disordered-speech-using-bertscore-2209.10591"/></url>
<url><loc>https://scifaro.com/en/abs/epic-tts-models-empirical-pruning-investigations-characterizing-text-to-speech-models-2209.10890</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/epic-tts-models-empirical-pruning-investigations-characterizing-text-to-speech-models-2209.10890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/epic-tts-models-empirical-pruning-investigations-characterizing-text-to-speech-models-2209.10890"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-voice-activity-detection-with-self-supervised-representations-2209.11061</loc><lastmod>2022-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-voice-activity-detection-with-self-supervised-representations-2209.11061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-voice-activity-detection-with-self-supervised-representations-2209.11061"/></url>
<url><loc>https://scifaro.com/en/abs/isolation-performance-metrics-for-personal-sound-zone-reproduction-systems-2209.11296</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/isolation-performance-metrics-for-personal-sound-zone-reproduction-systems-2209.11296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/isolation-performance-metrics-for-personal-sound-zone-reproduction-systems-2209.11296"/></url>
<url><loc>https://scifaro.com/en/abs/the-kriston-ai-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.11433</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-kriston-ai-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.11433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-kriston-ai-system-for-the-voxceleb-speaker-recognition-challenge-2022-2209.11433"/></url>
<url><loc>https://scifaro.com/en/abs/mms-msg-a-multi-purpose-multi-speaker-mixture-signal-generator-2209.11494</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mms-msg-a-multi-purpose-multi-speaker-mixture-signal-generator-2209.11494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mms-msg-a-multi-purpose-multi-speaker-mixture-signal-generator-2209.11494"/></url>
<url><loc>https://scifaro.com/en/abs/stereo-inse-net-stereo-audio-quality-predictor-transfer-learned-from-mono-inse-net-2209.11666</loc><lastmod>2022-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stereo-inse-net-stereo-audio-quality-predictor-transfer-learned-from-mono-inse-net-2209.11666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stereo-inse-net-stereo-audio-quality-predictor-transfer-learned-from-mono-inse-net-2209.11666"/></url>
<url><loc>https://scifaro.com/en/abs/controlvc-zero-shot-voice-conversion-with-time-varying-controls-on-pitch-and-speed-2209.11866</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlvc-zero-shot-voice-conversion-with-time-varying-controls-on-pitch-and-speed-2209.11866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlvc-zero-shot-voice-conversion-with-time-varying-controls-on-pitch-and-speed-2209.11866"/></url>
<url><loc>https://scifaro.com/en/abs/nwpu-aslp-system-for-the-voiceprivacy-2022-challenge-2209.11969</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nwpu-aslp-system-for-the-voiceprivacy-2022-challenge-2209.11969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nwpu-aslp-system-for-the-voiceprivacy-2022-challenge-2209.11969"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-aware-speaker-diarization-for-multi-channel-multi-party-meeting-2209.12002</loc><lastmod>2022-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-aware-speaker-diarization-for-multi-channel-multi-party-meeting-2209.12002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-aware-speaker-diarization-for-multi-channel-multi-party-meeting-2209.12002"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-self-supervised-learning-2209.12702</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-self-supervised-learning-2209.12702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-lyrics-recognition-with-self-supervised-learning-2209.12702"/></url>
<url><loc>https://scifaro.com/en/abs/multi-encoder-attention-based-architectures-for-sound-recognition-with-partial-visual-assistance-2209.12826</loc><lastmod>2022-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-encoder-attention-based-architectures-for-sound-recognition-with-partial-visual-assistance-2209.12826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-encoder-attention-based-architectures-for-sound-recognition-with-partial-visual-assistance-2209.12826"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-temporal-resolution-on-convolutional-recurrent-networks-for-audio-tagging-and-sound-event-detection-2209.12843</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-temporal-resolution-on-convolutional-recurrent-networks-for-audio-tagging-and-sound-event-detection-2209.12843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-temporal-resolution-on-convolutional-recurrent-networks-for-audio-tagging-and-sound-event-detection-2209.12843"/></url>
<url><loc>https://scifaro.com/en/abs/automated-sex-classification-of-children-s-voices-and-changes-in-differentiating-factors-with-age-2209.13112</loc><lastmod>2022-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automated-sex-classification-of-children-s-voices-and-changes-in-differentiating-factors-with-age-2209.13112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automated-sex-classification-of-children-s-voices-and-changes-in-differentiating-factors-with-age-2209.13112"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-affective-vocal-bursts-with-finetuned-wav2vec-2-0-2209.13146</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-affective-vocal-bursts-with-finetuned-wav2vec-2-0-2209.13146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-affective-vocal-bursts-with-finetuned-wav2vec-2-0-2209.13146"/></url>
<url><loc>https://scifaro.com/en/abs/hyperbolic-timbre-embedding-for-musical-instrument-sound-synthesis-based-on-variational-autoencoders-2209.13211</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperbolic-timbre-embedding-for-musical-instrument-sound-synthesis-based-on-variational-autoencoders-2209.13211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperbolic-timbre-embedding-for-musical-instrument-sound-synthesis-based-on-variational-autoencoders-2209.13211"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-self-supervised-pre-trained-model-and-vector-quantization-2209.14150</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-self-supervised-pre-trained-model-and-vector-quantization-2209.14150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-self-supervised-pre-trained-model-and-vector-quantization-2209.14150"/></url>
<url><loc>https://scifaro.com/en/abs/audio-retrieval-with-wavtext5k-and-clap-training-2209.14275</loc><lastmod>2022-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-retrieval-with-wavtext5k-and-clap-training-2209.14275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-retrieval-with-wavtext5k-and-clap-training-2209.14275"/></url>
<url><loc>https://scifaro.com/en/abs/text-independent-speaker-identification-system-for-access-control-2209.14335</loc><lastmod>2022-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-independent-speaker-identification-system-for-access-control-2209.14335"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-independent-speaker-identification-system-for-access-control-2209.14335"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-prosodic-boundaries-in-speech-using-wav2vec-2-0-2209.15032</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-prosodic-boundaries-in-speech-using-wav2vec-2-0-2209.15032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-prosodic-boundaries-in-speech-using-wav2vec-2-0-2209.15032"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-with-band-split-rnn-2209.15174</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-with-band-split-rnn-2209.15174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-with-band-split-rnn-2209.15174"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-in-speech-emotion-recognition-using-bayesian-neural-networks-and-label-distribution-learning-2209.15449</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-in-speech-emotion-recognition-using-bayesian-neural-networks-and-label-distribution-learning-2209.15449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-label-uncertainty-modeling-in-speech-emotion-recognition-using-bayesian-neural-networks-and-label-distribution-learning-2209.15449"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-speech-enhancement-using-stoi-optimal-masks-2209.15472</loc><lastmod>2022-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-speech-enhancement-using-stoi-optimal-masks-2209.15472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-speech-enhancement-using-stoi-optimal-masks-2209.15472"/></url>
<url><loc>https://scifaro.com/en/abs/e-branchformer-branchformer-with-enhanced-merging-for-speech-recognition-2210.00077</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e-branchformer-branchformer-with-enhanced-merging-for-speech-recognition-2210.00077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e-branchformer-branchformer-with-enhanced-merging-for-speech-recognition-2210.00077"/></url>
<url><loc>https://scifaro.com/en/abs/blind-signal-dereverberation-for-machine-speech-recognition-2210.00117</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-signal-dereverberation-for-machine-speech-recognition-2210.00117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-signal-dereverberation-for-machine-speech-recognition-2210.00117"/></url>
<url><loc>https://scifaro.com/en/abs/pre-trained-speech-representations-as-feature-extractors-for-speech-quality-assessment-in-online-conferencing-applications-2210.00259</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-trained-speech-representations-as-feature-extractors-for-speech-quality-assessment-in-online-conferencing-applications-2210.00259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-trained-speech-representations-as-feature-extractors-for-speech-quality-assessment-in-online-conferencing-applications-2210.00259"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-wav2vec-for-vocal-burst-emotion-recognition-2210.00263</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-wav2vec-for-vocal-burst-emotion-recognition-2210.00263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-wav2vec-for-vocal-burst-emotion-recognition-2210.00263"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-transformer-convolutional-and-recurrent-neural-networks-on-phoneme-recognition-2210.00367</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-transformer-convolutional-and-recurrent-neural-networks-on-phoneme-recognition-2210.00367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-transformer-convolutional-and-recurrent-neural-networks-on-phoneme-recognition-2210.00367"/></url>
<url><loc>https://scifaro.com/en/abs/optimized-decoders-for-mixed-order-ambisonics-2210.00378</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimized-decoders-for-mixed-order-ambisonics-2210.00378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimized-decoders-for-mixed-order-ambisonics-2210.00378"/></url>
<url><loc>https://scifaro.com/en/abs/voice-spoofing-countermeasures-taxonomy-state-of-the-art-experimental-analysis-of-generalizability-open-challenges-and-the-way-forward-2210.00417</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-spoofing-countermeasures-taxonomy-state-of-the-art-experimental-analysis-of-generalizability-open-challenges-and-the-way-forward-2210.00417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-spoofing-countermeasures-taxonomy-state-of-the-art-experimental-analysis-of-generalizability-open-challenges-and-the-way-forward-2210.00417"/></url>
<url><loc>https://scifaro.com/en/abs/music-to-text-synaesthesia-generating-descriptive-text-from-music-recordings-2210.00434</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-to-text-synaesthesia-generating-descriptive-text-from-music-recordings-2210.00434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-to-text-synaesthesia-generating-descriptive-text-from-music-recordings-2210.00434"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-acoustic-frontends-in-bird-activity-detection-2210.00889</loc><lastmod>2022-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-acoustic-frontends-in-bird-activity-detection-2210.00889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-acoustic-frontends-in-bird-activity-detection-2210.00889"/></url>
<url><loc>https://scifaro.com/en/abs/simple-pooling-front-ends-for-efficient-audio-classification-2210.00943</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simple-pooling-front-ends-for-efficient-audio-classification-2210.00943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simple-pooling-front-ends-for-efficient-audio-classification-2210.00943"/></url>
<url><loc>https://scifaro.com/en/abs/an-attention-based-backend-allowing-efficient-fine-tuning-of-transformer-models-for-speaker-verification-2210.01273</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attention-based-backend-allowing-efficient-fine-tuning-of-transformer-models-for-speaker-verification-2210.01273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attention-based-backend-allowing-efficient-fine-tuning-of-transformer-models-for-speaker-verification-2210.01273"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-dukeece-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.01677</loc><lastmod>2022-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-dukeece-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.01677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-dukeece-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2022-2210.01677"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-hysteresis-comparator-and-rms-threshold-methods-for-automatic-single-cough-segmentations-2210.02057</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-hysteresis-comparator-and-rms-threshold-methods-for-automatic-single-cough-segmentations-2210.02057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-hysteresis-comparator-and-rms-threshold-methods-for-automatic-single-cough-segmentations-2210.02057"/></url>
<url><loc>https://scifaro.com/en/abs/exploration-of-a-self-supervised-speech-model-a-study-on-emotional-corpora-2210.02595</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploration-of-a-self-supervised-speech-model-a-study-on-emotional-corpora-2210.02595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploration-of-a-self-supervised-speech-model-a-study-on-emotional-corpora-2210.02595"/></url>
<url><loc>https://scifaro.com/en/abs/fully-unsupervised-training-of-few-shot-keyword-spotting-2210.02732</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-unsupervised-training-of-few-shot-keyword-spotting-2210.02732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-unsupervised-training-of-few-shot-keyword-spotting-2210.02732"/></url>
<url><loc>https://scifaro.com/en/abs/mutual-learning-of-single-and-multi-channel-end-to-end-neural-diarization-2210.03459</loc><lastmod>2022-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mutual-learning-of-single-and-multi-channel-end-to-end-neural-diarization-2210.03459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mutual-learning-of-single-and-multi-channel-end-to-end-neural-diarization-2210.03459"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-voice-detection-and-audio-splicing-detection-using-se-res2net-conformer-architecture-2210.03581</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-voice-detection-and-audio-splicing-detection-using-se-res2net-conformer-architecture-2210.03581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-voice-detection-and-audio-splicing-detection-using-se-res2net-conformer-architecture-2210.03581"/></url>
<url><loc>https://scifaro.com/en/abs/cross-dataset-covid-19-transfer-learning-with-cough-detection-cough-segmentation-and-data-augmentation-2210.05843</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-dataset-covid-19-transfer-learning-with-cough-detection-cough-segmentation-and-data-augmentation-2210.05843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-dataset-covid-19-transfer-learning-with-cough-detection-cough-segmentation-and-data-augmentation-2210.05843"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-speaker-consistency-learning-using-untranscribed-speech-data-for-zero-shot-multi-speaker-text-to-speech-2210.05979</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-speaker-consistency-learning-using-untranscribed-speech-data-for-zero-shot-multi-speaker-text-to-speech-2210.05979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-speaker-consistency-learning-using-untranscribed-speech-data-for-zero-shot-multi-speaker-text-to-speech-2210.05979"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-efficient-tuning-methods-in-self-supervised-speech-models-2210.06175</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-efficient-tuning-methods-in-self-supervised-speech-models-2210.06175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-efficient-tuning-methods-in-self-supervised-speech-models-2210.06175"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-use-common-voice-to-train-a-multi-speaker-tts-system-2210.06370</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-use-common-voice-to-train-a-multi-speaker-tts-system-2210.06370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-use-common-voice-to-train-a-multi-speaker-tts-system-2210.06370"/></url>
<url><loc>https://scifaro.com/en/abs/an-ensemble-teacher-student-learning-approach-with-poisson-sub-sampling-to-differential-privacy-preserving-speech-recognition-2210.06382</loc><lastmod>2022-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ensemble-teacher-student-learning-approach-with-poisson-sub-sampling-to-differential-privacy-preserving-speech-recognition-2210.06382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ensemble-teacher-student-learning-approach-with-poisson-sub-sampling-to-differential-privacy-preserving-speech-recognition-2210.06382"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-method-for-metric-level-switching-in-beat-tracking-2210.06817</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-method-for-metric-level-switching-in-beat-tracking-2210.06817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-method-for-metric-level-switching-in-beat-tracking-2210.06817"/></url>
<url><loc>https://scifaro.com/en/abs/deepfake-detection-system-for-the-add-challenge-track-3-2-based-on-score-fusion-2210.06818</loc><lastmod>2022-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfake-detection-system-for-the-add-challenge-track-3-2-based-on-score-fusion-2210.06818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfake-detection-system-for-the-add-challenge-track-3-2-based-on-score-fusion-2210.06818"/></url>
<url><loc>https://scifaro.com/en/abs/transfusion-transcribing-speech-with-multinomial-diffusion-2210.07677</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfusion-transcribing-speech-with-multinomial-diffusion-2210.07677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfusion-transcribing-speech-with-multinomial-diffusion-2210.07677"/></url>
<url><loc>https://scifaro.com/en/abs/levoice-asr-systems-for-the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-2210.07749</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/levoice-asr-systems-for-the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-2210.07749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/levoice-asr-systems-for-the-iscslp-2022-intelligent-cockpit-speech-recognition-challenge-2210.07749"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-jointly-transcribe-and-subtitle-for-end-to-end-spontaneous-speech-recognition-2210.07771</loc><lastmod>2022-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-jointly-transcribe-and-subtitle-for-end-to-end-spontaneous-speech-recognition-2210.07771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-jointly-transcribe-and-subtitle-for-end-to-end-spontaneous-speech-recognition-2210.07771"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-analysis-of-novelties-introduced-in-dcase-task-4-2022-on-the-baseline-system-2210.07856</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-analysis-of-novelties-introduced-in-dcase-task-4-2022-on-the-baseline-system-2210.07856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-analysis-of-novelties-introduced-in-dcase-task-4-2022-on-the-baseline-system-2210.07856"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-two-dimensional-sound-source-localization-with-ad-hoc-microphone-arrays-2210.08484</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-two-dimensional-sound-source-localization-with-ad-hoc-microphone-arrays-2210.08484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-two-dimensional-sound-source-localization-with-ad-hoc-microphone-arrays-2210.08484"/></url>
<url><loc>https://scifaro.com/en/abs/ctcbert-advancing-hidden-unit-bert-with-ctc-objectives-2210.08603</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctcbert-advancing-hidden-unit-bert-with-ctc-objectives-2210.08603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctcbert-advancing-hidden-unit-bert-with-ctc-objectives-2210.08603"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-audio-embeddings-for-query-by-example-2210.08624</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-audio-embeddings-for-query-by-example-2210.08624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-audio-embeddings-for-query-by-example-2210.08624"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-aware-non-autoregressive-spell-correction-with-mask-sample-decoding-2210.08665</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-aware-non-autoregressive-spell-correction-with-mask-sample-decoding-2210.08665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-aware-non-autoregressive-spell-correction-with-mask-sample-decoding-2210.08665"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-dccrn-dccrn-equipped-with-frame-level-angle-feature-and-hybrid-filtering-for-multi-channel-speech-enhancement-2210.08802</loc><lastmod>2022-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-dccrn-dccrn-equipped-with-frame-level-angle-feature-and-hybrid-filtering-for-multi-channel-speech-enhancement-2210.08802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-dccrn-dccrn-equipped-with-frame-level-angle-feature-and-hybrid-filtering-for-multi-channel-speech-enhancement-2210.08802"/></url>
<url><loc>https://scifaro.com/en/abs/torchdiva-an-extensible-computational-model-of-speech-production-built-on-an-open-source-machine-learning-library-2210.09334</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/torchdiva-an-extensible-computational-model-of-speech-production-built-on-an-open-source-machine-learning-library-2210.09334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/torchdiva-an-extensible-computational-model-of-speech-production-built-on-an-open-source-machine-learning-library-2210.09334"/></url>
<url><loc>https://scifaro.com/en/abs/extracting-speaker-and-emotion-information-from-self-supervised-speech-models-via-channel-wise-correlations-2210.09513</loc><lastmod>2022-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extracting-speaker-and-emotion-information-from-self-supervised-speech-models-via-channel-wise-correlations-2210.09513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extracting-speaker-and-emotion-information-from-self-supervised-speech-models-via-channel-wise-correlations-2210.09513"/></url>
<url><loc>https://scifaro.com/en/abs/risk-of-re-identification-for-shared-clinical-speech-recordings-2210.09975</loc><lastmod>2023-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/risk-of-re-identification-for-shared-clinical-speech-recordings-2210.09975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/risk-of-re-identification-for-shared-clinical-speech-recordings-2210.09975"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-temporal-resolution-of-convolutional-recurrent-neural-networks-for-sound-event-detection-2210.10208</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-temporal-resolution-of-convolutional-recurrent-neural-networks-for-sound-event-detection-2210.10208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-temporal-resolution-of-convolutional-recurrent-neural-networks-for-sound-event-detection-2210.10208"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-transformer-architectures-for-audiovisual-scene-classification-2210.10212</loc><lastmod>2022-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-transformer-architectures-for-audiovisual-scene-classification-2210.10212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-transformer-architectures-for-audiovisual-scene-classification-2210.10212"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-stage-wise-two-dimensional-speaker-localization-with-large-ad-hoc-microphone-arrays-2210.10265</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-stage-wise-two-dimensional-speaker-localization-with-large-ad-hoc-microphone-arrays-2210.10265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-stage-wise-two-dimensional-speaker-localization-with-large-ad-hoc-microphone-arrays-2210.10265"/></url>
<url><loc>https://scifaro.com/en/abs/spoofed-training-data-for-speech-spoofing-countermeasure-can-be-efficiently-created-using-neural-vocoders-2210.10570</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofed-training-data-for-speech-spoofing-countermeasure-can-be-efficiently-created-using-neural-vocoders-2210.10570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofed-training-data-for-speech-spoofing-countermeasure-can-be-efficiently-created-using-neural-vocoders-2210.10570"/></url>
<url><loc>https://scifaro.com/en/abs/disc-vc-disentangled-and-f0-controllable-neural-voice-conversion-2210.11059</loc><lastmod>2022-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disc-vc-disentangled-and-f0-controllable-neural-voice-conversion-2210.11059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disc-vc-disentangled-and-f0-controllable-neural-voice-conversion-2210.11059"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2210.11089</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2210.11089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dereverberation-with-a-reverberation-time-shortening-target-2210.11089"/></url>
<url><loc>https://scifaro.com/en/abs/model-matching-principle-applied-to-the-design-of-an-array-based-all-neural-binaural-rendering-system-for-audio-telepresence-2210.11123</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-matching-principle-applied-to-the-design-of-an-array-based-all-neural-binaural-rendering-system-for-audio-telepresence-2210.11123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-matching-principle-applied-to-the-design-of-an-array-based-all-neural-binaural-rendering-system-for-audio-telepresence-2210.11123"/></url>
<url><loc>https://scifaro.com/en/abs/discriminatory-and-orthogonal-feature-learning-for-noise-robust-keyword-spotting-2210.11519</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminatory-and-orthogonal-feature-learning-for-noise-robust-keyword-spotting-2210.11519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminatory-and-orthogonal-feature-learning-for-noise-robust-keyword-spotting-2210.11519"/></url>
<url><loc>https://scifaro.com/en/abs/anchored-speech-recognition-with-neural-transducers-2210.11588</loc><lastmod>2023-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anchored-speech-recognition-with-neural-transducers-2210.11588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anchored-speech-recognition-with-neural-transducers-2210.11588"/></url>
<url><loc>https://scifaro.com/en/abs/improved-normalizing-flow-based-speech-enhancement-using-an-all-pole-gammatone-filterbank-for-conditional-input-representation-2210.11654</loc><lastmod>2022-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-normalizing-flow-based-speech-enhancement-using-an-all-pole-gammatone-filterbank-for-conditional-input-representation-2210.11654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-normalizing-flow-based-speech-enhancement-using-an-all-pole-gammatone-filterbank-for-conditional-input-representation-2210.11654"/></url>
<url><loc>https://scifaro.com/en/abs/evidence-of-vocal-tract-articulation-in-self-supervised-learning-of-speech-2210.11723</loc><lastmod>2023-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evidence-of-vocal-tract-articulation-in-self-supervised-learning-of-speech-2210.11723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evidence-of-vocal-tract-articulation-in-self-supervised-learning-of-speech-2210.11723"/></url>
<url><loc>https://scifaro.com/en/abs/deep-domain-adaptation-for-polyphonic-melody-extraction-2210.12532</loc><lastmod>2023-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-domain-adaptation-for-polyphonic-melody-extraction-2210.12532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-domain-adaptation-for-polyphonic-melody-extraction-2210.12532"/></url>
<url><loc>https://scifaro.com/en/abs/chowdsp-wdf-an-advanced-c-library-for-wave-digital-circuit-modelling-2210.12554</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chowdsp-wdf-an-advanced-c-library-for-wave-digital-circuit-modelling-2210.12554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chowdsp-wdf-an-advanced-c-library-for-wave-digital-circuit-modelling-2210.12554"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-identification-from-emotional-and-noisy-speech-data-using-learned-voice-segregation-and-speech-vgg-2210.12701</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-identification-from-emotional-and-noisy-speech-data-using-learned-voice-segregation-and-speech-vgg-2210.12701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-identification-from-emotional-and-noisy-speech-data-using-learned-voice-segregation-and-speech-vgg-2210.12701"/></url>
<url><loc>https://scifaro.com/en/abs/hifi-wavegan-generative-adversarial-network-with-auxiliary-spectrogram-phase-loss-for-high-fidelity-singing-voice-generation-2210.12740</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hifi-wavegan-generative-adversarial-network-with-auxiliary-spectrogram-phase-loss-for-high-fidelity-singing-voice-generation-2210.12740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hifi-wavegan-generative-adversarial-network-with-auxiliary-spectrogram-phase-loss-for-high-fidelity-singing-voice-generation-2210.12740"/></url>
<url><loc>https://scifaro.com/en/abs/tridentse-guiding-speech-enhancement-with-32-global-tokens-2210.12995</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tridentse-guiding-speech-enhancement-with-32-global-tokens-2210.12995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tridentse-guiding-speech-enhancement-with-32-global-tokens-2210.12995"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-frame-structure-for-cloud-based-audio-visual-speech-enhancement-in-multimodal-hearing-aids-2210.13127</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-frame-structure-for-cloud-based-audio-visual-speech-enhancement-in-multimodal-hearing-aids-2210.13127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-frame-structure-for-cloud-based-audio-visual-speech-enhancement-in-multimodal-hearing-aids-2210.13127"/></url>
<url><loc>https://scifaro.com/en/abs/weak-supervised-dysarthria-invariant-features-for-spoken-language-understanding-using-an-fhvae-and-adversarial-training-2210.13144</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weak-supervised-dysarthria-invariant-features-for-spoken-language-understanding-using-an-fhvae-and-adversarial-training-2210.13144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weak-supervised-dysarthria-invariant-features-for-spoken-language-understanding-using-an-fhvae-and-adversarial-training-2210.13144"/></url>
<url><loc>https://scifaro.com/en/abs/can-visual-context-improve-automatic-speech-recognition-for-an-embodied-agent-2210.13189</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-visual-context-improve-automatic-speech-recognition-for-an-embodied-agent-2210.13189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-visual-context-improve-automatic-speech-recognition-for-an-embodied-agent-2210.13189"/></url>
<url><loc>https://scifaro.com/en/abs/brouhaha-multi-task-training-for-voice-activity-detection-speech-to-noise-ratio-and-c50-room-acoustics-estimation-2210.13248</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/brouhaha-multi-task-training-for-voice-activity-detection-speech-to-noise-ratio-and-c50-room-acoustics-estimation-2210.13248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/brouhaha-multi-task-training-for-voice-activity-detection-speech-to-noise-ratio-and-c50-room-acoustics-estimation-2210.13248"/></url>
<url><loc>https://scifaro.com/en/abs/an-acoustical-machine-learning-approach-to-determine-abrasive-belt-wear-of-wide-belt-sanders-2210.13273</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-acoustical-machine-learning-approach-to-determine-abrasive-belt-wear-of-wide-belt-sanders-2210.13273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-acoustical-machine-learning-approach-to-determine-abrasive-belt-wear-of-wide-belt-sanders-2210.13273"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-speech-enhancement-for-robust-automatic-speech-recognition-2210.13318</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-for-robust-automatic-speech-recognition-2210.13318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-speech-enhancement-for-robust-automatic-speech-recognition-2210.13318"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-neural-audio-compression-2210.13438</loc><lastmod>2022-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-neural-audio-compression-2210.13438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-neural-audio-compression-2210.13438"/></url>
<url><loc>https://scifaro.com/en/abs/highly-efficient-real-time-streaming-and-fully-on-device-speaker-diarization-with-multi-stage-clustering-2210.13690</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/highly-efficient-real-time-streaming-and-fully-on-device-speaker-diarization-with-multi-stage-clustering-2210.13690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/highly-efficient-real-time-streaming-and-fully-on-device-speaker-diarization-with-multi-stage-clustering-2210.13690"/></url>
<url><loc>https://scifaro.com/en/abs/does-joint-training-really-help-cascaded-speech-translation-2210.13700</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-joint-training-really-help-cascaded-speech-translation-2210.13700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-joint-training-really-help-cascaded-speech-translation-2210.13700"/></url>
<url><loc>https://scifaro.com/en/abs/mixed-evc-mixed-emotion-synthesis-and-control-in-voice-conversion-2210.13756</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixed-evc-mixed-emotion-synthesis-and-control-in-voice-conversion-2210.13756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixed-evc-mixed-emotion-synthesis-and-control-in-voice-conversion-2210.13756"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-parrotron-for-on-device-speech-to-speech-conversion-2210.13761</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-parrotron-for-on-device-speech-to-speech-conversion-2210.13761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-parrotron-for-on-device-speech-to-speech-conversion-2210.13761"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-speech-representation-learning-for-one-shot-cross-lingual-voice-conversion-using-beta-vae-2210.13771</loc><lastmod>2022-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-speech-representation-learning-for-one-shot-cross-lingual-voice-conversion-using-beta-vae-2210.13771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-speech-representation-learning-for-one-shot-cross-lingual-voice-conversion-using-beta-vae-2210.13771"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-fuzzy-decomposition-of-sound-into-sines-transients-and-noise-2210.14041</loc><lastmod>2022-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-fuzzy-decomposition-of-sound-into-sines-transients-and-noise-2210.14041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-fuzzy-decomposition-of-sound-into-sines-transients-and-noise-2210.14041"/></url>
<url><loc>https://scifaro.com/en/abs/eben-extreme-bandwidth-extension-network-applied-to-speech-signals-captured-with-noise-resilient-body-conduction-microphones-2210.14090</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eben-extreme-bandwidth-extension-network-applied-to-speech-signals-captured-with-noise-resilient-body-conduction-microphones-2210.14090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eben-extreme-bandwidth-extension-network-applied-to-speech-signals-captured-with-noise-resilient-body-conduction-microphones-2210.14090"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-asmr-a-cyber-psychological-approach-2210.14321</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-asmr-a-cyber-psychological-approach-2210.14321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-asmr-a-cyber-psychological-approach-2210.14321"/></url>
<url><loc>https://scifaro.com/en/abs/redpen-region-and-reason-annotated-dataset-of-unnatural-speech-2210.14406</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/redpen-region-and-reason-annotated-dataset-of-unnatural-speech-2210.14406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/redpen-region-and-reason-annotated-dataset-of-unnatural-speech-2210.14406"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-different-splitting-criteria-on-the-performance-of-speech-emotion-recognition-2210.14501</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-different-splitting-criteria-on-the-performance-of-speech-emotion-recognition-2210.14501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-different-splitting-criteria-on-the-performance-of-speech-emotion-recognition-2210.14501"/></url>
<url><loc>https://scifaro.com/en/abs/ufo2-a-unified-pre-training-framework-for-online-and-offline-speech-recognition-2210.14515</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ufo2-a-unified-pre-training-framework-for-online-and-offline-speech-recognition-2210.14515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ufo2-a-unified-pre-training-framework-for-online-and-offline-speech-recognition-2210.14515"/></url>
<url><loc>https://scifaro.com/en/abs/position-tracking-of-a-varying-number-of-sound-sources-with-sliding-permutation-invariant-training-2210.14536</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/position-tracking-of-a-varying-number-of-sound-sources-with-sliding-permutation-invariant-training-2210.14536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/position-tracking-of-a-varying-number-of-sound-sources-with-sliding-permutation-invariant-training-2210.14536"/></url>
<url><loc>https://scifaro.com/en/abs/adams-deep-metric-learning-with-adaptive-margin-and-adaptive-scale-for-acoustic-word-discrimination-2210.14564</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adams-deep-metric-learning-with-adaptive-margin-and-adaptive-scale-for-acoustic-word-discrimination-2210.14564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adams-deep-metric-learning-with-adaptive-margin-and-adaptive-scale-for-acoustic-word-discrimination-2210.14564"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-language-confusion-for-code-switching-speech-recognition-with-token-level-language-diarization-2210.14567</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-language-confusion-for-code-switching-speech-recognition-with-token-level-language-diarization-2210.14567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-language-confusion-for-code-switching-speech-recognition-with-token-level-language-diarization-2210.14567"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-audio-visual-multi-speaker-doa-estimation-using-permutation-free-loss-function-2210.14581</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-audio-visual-multi-speaker-doa-estimation-using-permutation-free-loss-function-2210.14581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-audio-visual-multi-speaker-doa-estimation-using-permutation-free-loss-function-2210.14581"/></url>
<url><loc>https://scifaro.com/en/abs/masked-modeling-duo-learning-representations-by-encouraging-both-networks-to-model-the-input-2210.14648</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-modeling-duo-learning-representations-by-encouraging-both-networks-to-model-the-input-2210.14648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-modeling-duo-learning-representations-by-encouraging-both-networks-to-model-the-input-2210.14648"/></url>
<url><loc>https://scifaro.com/en/abs/xiaoicesing-2-a-high-fidelity-singing-voice-synthesizer-based-on-generative-adversarial-network-2210.14666</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xiaoicesing-2-a-high-fidelity-singing-voice-synthesizer-based-on-generative-adversarial-network-2210.14666"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xiaoicesing-2-a-high-fidelity-singing-voice-synthesizer-based-on-generative-adversarial-network-2210.14666"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-pressure-matching-based-on-kernel-interpolation-for-sound-field-reproduction-2210.14711</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-pressure-matching-based-on-kernel-interpolation-for-sound-field-reproduction-2210.14711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-pressure-matching-based-on-kernel-interpolation-for-sound-field-reproduction-2210.14711"/></url>
<url><loc>https://scifaro.com/en/abs/naturalistic-head-motion-generation-from-speech-2210.14800</loc><lastmod>2022-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturalistic-head-motion-generation-from-speech-2210.14800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturalistic-head-motion-generation-from-speech-2210.14800"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-preserving-automatic-speaker-diarization-2210.14995</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-preserving-automatic-speaker-diarization-2210.14995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-preserving-automatic-speaker-diarization-2210.14995"/></url>
<url><loc>https://scifaro.com/en/abs/acoustically-driven-phoneme-removal-that-preserves-vocal-affect-cues-2210.15001</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustically-driven-phoneme-removal-that-preserves-vocal-affect-cues-2210.15001"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustically-driven-phoneme-removal-that-preserves-vocal-affect-cues-2210.15001"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-voice-conversion-via-intermediate-bottleneck-features-and-non-streaming-teacher-guidance-2210.15158</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-voice-conversion-via-intermediate-bottleneck-features-and-non-streaming-teacher-guidance-2210.15158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-voice-conversion-via-intermediate-bottleneck-features-and-non-streaming-teacher-guidance-2210.15158"/></url>
<url><loc>https://scifaro.com/en/abs/masked-autoencoders-are-articulatory-learners-2210.15195</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-autoencoders-are-articulatory-learners-2210.15195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-autoencoders-are-articulatory-learners-2210.15195"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-field-unifying-measured-hrtf-magnitude-representation-with-neural-fields-2210.15196</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-field-unifying-measured-hrtf-magnitude-representation-with-neural-fields-2210.15196"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-field-unifying-measured-hrtf-magnitude-representation-with-neural-fields-2210.15196"/></url>
<url><loc>https://scifaro.com/en/abs/solving-audio-inverse-problems-with-a-diffusion-model-2210.15228</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/solving-audio-inverse-problems-with-a-diffusion-model-2210.15228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/solving-audio-inverse-problems-with-a-diffusion-model-2210.15228"/></url>
<url><loc>https://scifaro.com/en/abs/a-fast-and-accurate-pitch-estimation-algorithm-based-on-the-pseudo-wigner-ville-distribution-2210.15272</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fast-and-accurate-pitch-estimation-algorithm-based-on-the-pseudo-wigner-ville-distribution-2210.15272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fast-and-accurate-pitch-estimation-algorithm-based-on-the-pseudo-wigner-ville-distribution-2210.15272"/></url>
<url><loc>https://scifaro.com/en/abs/weight-averaging-a-simple-yet-effective-method-to-overcome-catastrophic-forgetting-in-automatic-speech-recognition-2210.15282</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weight-averaging-a-simple-yet-effective-method-to-overcome-catastrophic-forgetting-in-automatic-speech-recognition-2210.15282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weight-averaging-a-simple-yet-effective-method-to-overcome-catastrophic-forgetting-in-automatic-speech-recognition-2210.15282"/></url>
<url><loc>https://scifaro.com/en/abs/learning-music-representations-with-wav2vec-2-0-2210.15310</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-music-representations-with-wav2vec-2-0-2210.15310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-music-representations-with-wav2vec-2-0-2210.15310"/></url>
<url><loc>https://scifaro.com/en/abs/robust-data2vec-noise-robust-speech-representation-learning-for-asr-by-combining-regression-and-improved-contrastive-learning-2210.15324</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-data2vec-noise-robust-speech-representation-learning-for-asr-by-combining-regression-and-improved-contrastive-learning-2210.15324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-data2vec-noise-robust-speech-representation-learning-for-asr-by-combining-regression-and-improved-contrastive-learning-2210.15324"/></url>
<url><loc>https://scifaro.com/en/abs/multi-class-detection-of-pathological-speech-with-latent-features-how-does-it-perform-on-unseen-data-2210.15336</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-class-detection-of-pathological-speech-with-latent-features-how-does-it-perform-on-unseen-data-2210.15336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-class-detection-of-pathological-speech-with-latent-features-how-does-it-perform-on-unseen-data-2210.15336"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dimensional-edge-based-audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2210.15366</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dimensional-edge-based-audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2210.15366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dimensional-edge-based-audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2210.15366"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-training-of-speaker-encoder-with-multi-modal-diverse-positive-pairs-2210.15385</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-training-of-speaker-encoder-with-multi-modal-diverse-positive-pairs-2210.15385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-training-of-speaker-encoder-with-multi-modal-diverse-positive-pairs-2210.15385"/></url>
<url><loc>https://scifaro.com/en/abs/heimdal-highly-efficient-method-for-detection-and-localization-of-wake-words-2210.15425</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heimdal-highly-efficient-method-for-detection-and-localization-of-wake-words-2210.15425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heimdal-highly-efficient-method-for-detection-and-localization-of-wake-words-2210.15425"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-based-embeddings-for-spoofed-audio-representation-2210.15428</loc><lastmod>2022-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-based-embeddings-for-spoofed-audio-representation-2210.15428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-based-embeddings-for-spoofed-audio-representation-2210.15428"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-utilization-of-large-pre-trained-models-for-low-resource-asr-2210.15445</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-utilization-of-large-pre-trained-models-for-low-resource-asr-2210.15445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-utilization-of-large-pre-trained-models-for-low-resource-asr-2210.15445"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-spatial-information-with-the-informed-complex-valued-spatial-autoencoder-for-target-speaker-extraction-2210.15512</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-spatial-information-with-the-informed-complex-valued-spatial-autoencoder-for-target-speaker-extraction-2210.15512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-spatial-information-with-the-informed-complex-valued-spatial-autoencoder-for-target-speaker-extraction-2210.15512"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-effective-distillation-of-self-supervised-speech-models-for-automatic-speech-recognition-2210.15631</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-effective-distillation-of-self-supervised-speech-models-for-automatic-speech-recognition-2210.15631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-effective-distillation-of-self-supervised-speech-models-for-automatic-speech-recognition-2210.15631"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-realistic-speech-overlaps-improves-multi-talker-asr-2210.15715</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-realistic-speech-overlaps-improves-multi-talker-asr-2210.15715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-realistic-speech-overlaps-improves-multi-talker-asr-2210.15715"/></url>
<url><loc>https://scifaro.com/en/abs/proceedings-of-the-acii-affective-vocal-bursts-workshop-and-competition-2022-a-vb-understanding-a-critically-understudied-modality-of-emotional-expression-2210.15754</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proceedings-of-the-acii-affective-vocal-bursts-workshop-and-competition-2022-a-vb-understanding-a-critically-understudied-modality-of-emotional-expression-2210.15754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proceedings-of-the-acii-affective-vocal-bursts-workshop-and-competition-2022-a-vb-understanding-a-critically-understudied-modality-of-emotional-expression-2210.15754"/></url>
<url><loc>https://scifaro.com/en/abs/a-compact-end-to-end-model-with-local-and-global-context-for-spoken-language-identification-2210.15781</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-compact-end-to-end-model-with-local-and-global-context-for-spoken-language-identification-2210.15781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-compact-end-to-end-model-with-local-and-global-context-for-spoken-language-identification-2210.15781"/></url>
<url><loc>https://scifaro.com/en/abs/conditioning-and-sampling-in-variational-diffusion-models-for-speech-super-resolution-2210.15793</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditioning-and-sampling-in-variational-diffusion-models-for-speech-super-resolution-2210.15793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditioning-and-sampling-in-variational-diffusion-models-for-speech-super-resolution-2210.15793"/></url>
<url><loc>https://scifaro.com/en/abs/ux-net-filter-and-process-based-improved-u-net-for-real-time-time-domain-audio-separation-2210.15822</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ux-net-filter-and-process-based-improved-u-net-for-real-time-time-domain-audio-separation-2210.15822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ux-net-filter-and-process-based-improved-u-net-for-real-time-time-domain-audio-separation-2210.15822"/></url>
<url><loc>https://scifaro.com/en/abs/random-utterance-concatenation-based-data-augmentation-for-improving-short-video-speech-recognition-2210.15876</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/random-utterance-concatenation-based-data-augmentation-for-improving-short-video-speech-recognition-2210.15876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/random-utterance-concatenation-based-data-augmentation-for-improving-short-video-speech-recognition-2210.15876"/></url>
<url><loc>https://scifaro.com/en/abs/nonparallel-high-quality-audio-super-resolution-with-domain-adaptation-and-resampling-cyclegans-2210.15887</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nonparallel-high-quality-audio-super-resolution-with-domain-adaptation-and-resampling-cyclegans-2210.15887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nonparallel-high-quality-audio-super-resolution-with-domain-adaptation-and-resampling-cyclegans-2210.15887"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-recognition-with-two-step-multi-modal-deep-cleansing-2210.15903</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-recognition-with-two-step-multi-modal-deep-cleansing-2210.15903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-recognition-with-two-step-multi-modal-deep-cleansing-2210.15903"/></url>
<url><loc>https://scifaro.com/en/abs/influence-of-utterance-and-speaker-characteristics-on-the-classification-of-children-with-cleft-lip-and-palate-2210.15941</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/influence-of-utterance-and-speaker-characteristics-on-the-classification-of-children-with-cleft-lip-and-palate-2210.15941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/influence-of-utterance-and-speaker-characteristics-on-the-classification-of-children-with-cleft-lip-and-palate-2210.15941"/></url>
<url><loc>https://scifaro.com/en/abs/period-vits-variational-inference-with-explicit-pitch-modeling-for-end-to-end-emotional-speech-synthesis-2210.15964</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/period-vits-variational-inference-with-explicit-pitch-modeling-for-end-to-end-emotional-speech-synthesis-2210.15964"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/period-vits-variational-inference-with-explicit-pitch-modeling-for-end-to-end-emotional-speech-synthesis-2210.15964"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-and-high-fidelity-end-to-end-text-to-speech-with-multi-band-generation-and-inverse-short-time-fourier-transform-2210.15975</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-and-high-fidelity-end-to-end-text-to-speech-with-multi-band-generation-and-inverse-short-time-fourier-transform-2210.15975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-and-high-fidelity-end-to-end-text-to-speech-with-multi-band-generation-and-inverse-short-time-fourier-transform-2210.15975"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-ensemble-based-feature-selection-for-paralinguistics-tasks-2210.15978</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-ensemble-based-feature-selection-for-paralinguistics-tasks-2210.15978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-ensemble-based-feature-selection-for-paralinguistics-tasks-2210.15978"/></url>
<url><loc>https://scifaro.com/en/abs/dysfluencies-seldom-come-alone-detection-as-a-multi-label-problem-2210.15982</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dysfluencies-seldom-come-alone-detection-as-a-multi-label-problem-2210.15982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dysfluencies-seldom-come-alone-detection-as-a-multi-label-problem-2210.15982"/></url>
<url><loc>https://scifaro.com/en/abs/nnsvs-a-neural-network-based-singing-voice-synthesis-toolkit-2210.15987</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nnsvs-a-neural-network-based-singing-voice-synthesis-toolkit-2210.15987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nnsvs-a-neural-network-based-singing-voice-synthesis-toolkit-2210.15987"/></url>
<url><loc>https://scifaro.com/en/abs/sg-vad-stochastic-gates-based-speech-activity-detection-2210.16022</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sg-vad-stochastic-gates-based-speech-activity-detection-2210.16022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sg-vad-stochastic-gates-based-speech-activity-detection-2210.16022"/></url>
<url><loc>https://scifaro.com/en/abs/laugh-betrays-you-learning-robust-speaker-representation-from-speech-containing-non-verbal-fragments-2210.16028</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/laugh-betrays-you-learning-robust-speaker-representation-from-speech-containing-non-verbal-fragments-2210.16028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/laugh-betrays-you-learning-robust-speaker-representation-from-speech-containing-non-verbal-fragments-2210.16028"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-pre-trained-transformer-models-for-speaker-verification-using-adapters-2210.16032</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-pre-trained-transformer-models-for-speaker-verification-using-adapters-2210.16032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-pre-trained-transformer-models-for-speaker-verification-using-adapters-2210.16032"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-voice-activity-detection-via-sequence-to-sequence-prediction-2210.16127</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-via-sequence-to-sequence-prediction-2210.16127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-voice-activity-detection-via-sequence-to-sequence-prediction-2210.16127"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-utterance-training-for-automatic-speech-recognition-2210.16238</loc><lastmod>2022-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-utterance-training-for-automatic-speech-recognition-2210.16238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-utterance-training-for-automatic-speech-recognition-2210.16238"/></url>
<url><loc>https://scifaro.com/en/abs/visually-aware-audio-captioning-with-adaptive-audio-visual-attention-2210.16428</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-aware-audio-captioning-with-adaptive-audio-visual-attention-2210.16428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-aware-audio-captioning-with-adaptive-audio-visual-attention-2210.16428"/></url>
<url><loc>https://scifaro.com/en/abs/the-secret-source-incorporating-source-features-to-improve-acoustic-to-articulatory-speech-inversion-2210.16450</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-secret-source-incorporating-source-features-to-improve-acoustic-to-articulatory-speech-inversion-2210.16450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-secret-source-incorporating-source-features-to-improve-acoustic-to-articulatory-speech-inversion-2210.16450"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-compute-the-articulatory-representations-of-speech-with-the-mirrornet-2210.16454</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-compute-the-articulatory-representations-of-speech-with-the-mirrornet-2210.16454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-compute-the-articulatory-representations-of-speech-with-the-mirrornet-2210.16454"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-rnn-t-training-and-inference-using-ctc-guidance-2210.16481</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-rnn-t-training-and-inference-using-ctc-guidance-2210.16481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-rnn-t-training-and-inference-using-ctc-guidance-2210.16481"/></url>
<url><loc>https://scifaro.com/en/abs/articulatory-representation-learning-via-joint-factor-analysis-and-neural-matrix-factorization-2210.16498</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/articulatory-representation-learning-via-joint-factor-analysis-and-neural-matrix-factorization-2210.16498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/articulatory-representation-learning-via-joint-factor-analysis-and-neural-matrix-factorization-2210.16498"/></url>
<url><loc>https://scifaro.com/en/abs/application-of-knowledge-distillation-to-multi-task-speech-representation-learning-2210.16611</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/application-of-knowledge-distillation-to-multi-task-speech-representation-learning-2210.16611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/application-of-knowledge-distillation-to-multi-task-speech-representation-learning-2210.16611"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-speaker-representation-via-contrastive-learning-with-class-aware-attention-in-angular-space-2210.16622</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-speaker-representation-via-contrastive-learning-with-class-aware-attention-in-angular-space-2210.16622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-speaker-representation-via-contrastive-learning-with-class-aware-attention-in-angular-space-2210.16622"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-representation-learning-via-contrastive-loss-with-maximal-speaker-separability-2210.16636</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-representation-learning-via-contrastive-loss-with-maximal-speaker-separability-2210.16636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-representation-learning-via-contrastive-loss-with-maximal-speaker-separability-2210.16636"/></url>
<url><loc>https://scifaro.com/en/abs/bert-meets-ctc-new-formulation-of-end-to-end-speech-recognition-with-pre-trained-masked-language-model-2210.16663</loc><lastmod>2023-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bert-meets-ctc-new-formulation-of-end-to-end-speech-recognition-with-pre-trained-masked-language-model-2210.16663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bert-meets-ctc-new-formulation-of-end-to-end-speech-recognition-with-pre-trained-masked-language-model-2210.16663"/></url>
<url><loc>https://scifaro.com/en/abs/improvements-to-embedding-matching-acoustic-to-word-asr-using-multiple-hypothesis-pronunciation-based-embeddings-2210.16726</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvements-to-embedding-matching-acoustic-to-word-asr-using-multiple-hypothesis-pronunciation-based-embeddings-2210.16726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvements-to-embedding-matching-acoustic-to-word-asr-using-multiple-hypothesis-pronunciation-based-embeddings-2210.16726"/></url>
<url><loc>https://scifaro.com/en/abs/dude-dual-decoder-multilingual-asr-for-indian-languages-using-common-label-set-2210.16739</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dude-dual-decoder-multilingual-asr-for-indian-languages-using-common-label-set-2210.16739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dude-dual-decoder-multilingual-asr-for-indian-languages-using-common-label-set-2210.16739"/></url>
<url><loc>https://scifaro.com/en/abs/wekws-a-production-first-small-footprint-end-to-end-keyword-spotting-toolkit-2210.16743</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wekws-a-production-first-small-footprint-end-to-end-keyword-spotting-toolkit-2210.16743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wekws-a-production-first-small-footprint-end-to-end-keyword-spotting-toolkit-2210.16743"/></url>
<url><loc>https://scifaro.com/en/abs/improved-acoustic-to-articulatory-inversion-using-representations-from-pretrained-self-supervised-learning-models-2210.16871</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-acoustic-to-articulatory-inversion-using-representations-from-pretrained-self-supervised-learning-models-2210.16871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-acoustic-to-articulatory-inversion-using-representations-from-pretrained-self-supervised-learning-models-2210.16871"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-mri-video-synthesis-from-time-aligned-phonemes-with-sequence-to-sequence-networks-2210.16881</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-mri-video-synthesis-from-time-aligned-phonemes-with-sequence-to-sequence-networks-2210.16881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-mri-video-synthesis-from-time-aligned-phonemes-with-sequence-to-sequence-networks-2210.16881"/></url>
<url><loc>https://scifaro.com/en/abs/towards-developing-state-of-the-art-tts-synthesisers-for-13-indian-languages-with-signal-processing-aided-alignments-2210.17153</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-developing-state-of-the-art-tts-synthesisers-for-13-indian-languages-with-signal-processing-aided-alignments-2210.17153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-developing-state-of-the-art-tts-synthesisers-for-13-indian-languages-with-signal-processing-aided-alignments-2210.17153"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-processing-near-end-listening-enhancement-2210.17154</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-processing-near-end-listening-enhancement-2210.17154"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-processing-near-end-listening-enhancement-2210.17154"/></url>
<url><loc>https://scifaro.com/en/abs/diacorrect-end-to-end-error-correction-for-speaker-diarization-2210.17189</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diacorrect-end-to-end-error-correction-for-speaker-diarization-2210.17189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diacorrect-end-to-end-error-correction-for-speaker-diarization-2210.17189"/></url>
<url><loc>https://scifaro.com/en/abs/diffiner-a-versatile-diffusion-based-generative-refiner-for-speech-enhancement-2210.17287</loc><lastmod>2023-08-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffiner-a-versatile-diffusion-based-generative-refiner-for-speech-enhancement-2210.17287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffiner-a-versatile-diffusion-based-generative-refiner-for-speech-enhancement-2210.17287"/></url>
<url><loc>https://scifaro.com/en/abs/convolution-based-channel-frequency-attention-for-text-independent-speaker-verification-2210.17310</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolution-based-channel-frequency-attention-for-text-independent-speaker-verification-2210.17310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolution-based-channel-frequency-attention-for-text-independent-speaker-verification-2210.17310"/></url>
<url><loc>https://scifaro.com/en/abs/there-is-more-than-one-kind-of-robustness-fooling-whisper-with-adversarial-examples-2210.17316</loc><lastmod>2023-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/there-is-more-than-one-kind-of-robustness-fooling-whisper-with-adversarial-examples-2210.17316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/there-is-more-than-one-kind-of-robustness-fooling-whisper-with-adversarial-examples-2210.17316"/></url>
<url><loc>https://scifaro.com/en/abs/model-compression-for-dnn-based-speaker-verification-using-weight-quantization-2210.17326</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-compression-for-dnn-based-speaker-verification-using-weight-quantization-2210.17326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-compression-for-dnn-based-speaker-verification-using-weight-quantization-2210.17326"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-generative-speech-source-separation-2210.17327</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-generative-speech-source-separation-2210.17327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-generative-speech-source-separation-2210.17327"/></url>
<url><loc>https://scifaro.com/en/abs/voiceprivacy-2022-system-description-speaker-anonymization-with-feature-matched-f0-trajectories-2210.17338</loc><lastmod>2022-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceprivacy-2022-system-description-speaker-anonymization-with-feature-matched-f0-trajectories-2210.17338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceprivacy-2022-system-description-speaker-anonymization-with-feature-matched-f0-trajectories-2210.17338"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-and-separation-by-utilizing-multi-modal-self-supervised-embeddings-2210.17456</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-and-separation-by-utilizing-multi-modal-self-supervised-embeddings-2210.17456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-and-separation-by-utilizing-multi-modal-self-supervised-embeddings-2210.17456"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-degenerating-speech-due-to-progressive-dysarthria-on-asr-performance-2211.00089</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-degenerating-speech-due-to-progressive-dysarthria-on-asr-performance-2211.00089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-degenerating-speech-due-to-progressive-dysarthria-on-asr-performance-2211.00089"/></url>
<url><loc>https://scifaro.com/en/abs/imaginenet-target-speaker-extraction-with-intermittent-visual-cue-through-embedding-inpainting-2211.00109</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imaginenet-target-speaker-extraction-with-intermittent-visual-cue-through-embedding-inpainting-2211.00109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imaginenet-target-speaker-extraction-with-intermittent-visual-cue-through-embedding-inpainting-2211.00109"/></url>
<url><loc>https://scifaro.com/en/abs/waveform-boundary-detection-for-partially-spoofed-audio-2211.00226</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/waveform-boundary-detection-for-partially-spoofed-audio-2211.00226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/waveform-boundary-detection-for-partially-spoofed-audio-2211.00226"/></url>
<url><loc>https://scifaro.com/en/abs/speech-text-based-multi-modal-training-with-bidirectional-attention-for-improved-speech-recognition-2211.00325</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-text-based-multi-modal-training-with-bidirectional-attention-for-improved-speech-recognition-2211.00325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-text-based-multi-modal-training-with-bidirectional-attention-for-improved-speech-recognition-2211.00325"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-representation-learning-for-multilingual-speaker-recognition-2211.00437</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-representation-learning-for-multilingual-speaker-recognition-2211.00437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-representation-learning-for-multilingual-speaker-recognition-2211.00437"/></url>
<url><loc>https://scifaro.com/en/abs/metric-learning-for-user-defined-keyword-spotting-2211.00439</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metric-learning-for-user-defined-keyword-spotting-2211.00439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metric-learning-for-user-defined-keyword-spotting-2211.00439"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-self-supervised-models-to-multi-talker-speech-recognition-using-speaker-embeddings-2211.00482</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-self-supervised-models-to-multi-talker-speech-recognition-using-speaker-embeddings-2211.00482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-self-supervised-models-to-multi-talker-speech-recognition-using-speaker-embeddings-2211.00482"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-parallel-decoding-for-transducer-2211.00484</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-parallel-decoding-for-transducer-2211.00484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-parallel-decoding-for-transducer-2211.00484"/></url>
<url><loc>https://scifaro.com/en/abs/delay-penalized-transducer-for-low-latency-streaming-asr-2211.00490</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delay-penalized-transducer-for-low-latency-streaming-asr-2211.00490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delay-penalized-transducer-for-low-latency-streaming-asr-2211.00490"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-multi-codebook-vector-quantization-indexes-for-knowledge-distillation-2211.00508</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-multi-codebook-vector-quantization-indexes-for-knowledge-distillation-2211.00508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-multi-codebook-vector-quantization-indexes-for-knowledge-distillation-2211.00508"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-multichannel-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2211.00511</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-multichannel-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2211.00511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-multichannel-speaker-attributed-automatic-speech-recognition-in-multi-party-meetings-2211.00511"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-machine-learning-methods-2211.00569</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-machine-learning-methods-2211.00569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-bioacoustic-event-detection-with-machine-learning-methods-2211.00569"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-spherical-microphone-arrays-2211.00583</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-spherical-microphone-arrays-2211.00583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-spherical-microphone-arrays-2211.00583"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-equatorial-microphone-arrays-2211.00584</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-equatorial-microphone-arrays-2211.00584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonic-encoding-of-signals-from-equatorial-microphone-arrays-2211.00584"/></url>
<url><loc>https://scifaro.com/en/abs/adapter-based-extension-of-multi-speaker-text-to-speech-model-for-new-speakers-2211.00585</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapter-based-extension-of-multi-speaker-text-to-speech-model-for-new-speakers-2211.00585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapter-based-extension-of-multi-speaker-text-to-speech-model-for-new-speakers-2211.00585"/></url>
<url><loc>https://scifaro.com/en/abs/sca-streaming-cross-attention-alignment-for-echo-cancellation-2211.00589</loc><lastmod>2022-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sca-streaming-cross-attention-alignment-for-echo-cancellation-2211.00589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sca-streaming-cross-attention-alignment-for-echo-cancellation-2211.00589"/></url>
<url><loc>https://scifaro.com/en/abs/bectra-transducer-based-end-to-end-asr-with-bert-enhanced-encoder-2211.00792</loc><lastmod>2023-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bectra-transducer-based-end-to-end-asr-with-bert-enhanced-encoder-2211.00792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bectra-transducer-based-end-to-end-asr-with-bert-enhanced-encoder-2211.00792"/></url>
<url><loc>https://scifaro.com/en/abs/intermpl-momentum-pseudo-labeling-with-intermediate-ctc-loss-2211.00795</loc><lastmod>2023-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intermpl-momentum-pseudo-labeling-with-intermediate-ctc-loss-2211.00795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intermpl-momentum-pseudo-labeling-with-intermediate-ctc-loss-2211.00795"/></url>
<url><loc>https://scifaro.com/en/abs/lmd-a-learnable-mask-network-to-detect-adversarial-examples-for-speaker-verification-2211.00825</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lmd-a-learnable-mask-network-to-detect-adversarial-examples-for-speaker-verification-2211.00825"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lmd-a-learnable-mask-network-to-detect-adversarial-examples-for-speaker-verification-2211.00825"/></url>
<url><loc>https://scifaro.com/en/abs/neural-fourier-shift-for-binaural-speech-rendering-2211.00878</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-fourier-shift-for-binaural-speech-rendering-2211.00878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-fourier-shift-for-binaural-speech-rendering-2211.00878"/></url>
<url><loc>https://scifaro.com/en/abs/factorized-blank-thresholding-for-improved-runtime-efficiency-of-neural-transducers-2211.00896</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/factorized-blank-thresholding-for-improved-runtime-efficiency-of-neural-transducers-2211.00896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/factorized-blank-thresholding-for-improved-runtime-efficiency-of-neural-transducers-2211.00896"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-guitar-amplifier-modelling-with-unpaired-data-2211.00943</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-guitar-amplifier-modelling-with-unpaired-data-2211.00943"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-guitar-amplifier-modelling-with-unpaired-data-2211.00943"/></url>
<url><loc>https://scifaro.com/en/abs/monolingual-recognizers-fusion-for-code-switching-speech-recognition-2211.01046</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monolingual-recognizers-fusion-for-code-switching-speech-recognition-2211.01046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monolingual-recognizers-fusion-for-code-switching-speech-recognition-2211.01046"/></url>
<url><loc>https://scifaro.com/en/abs/i4u-system-description-for-nist-sre-20-cts-challenge-2211.01091</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i4u-system-description-for-nist-sre-20-cts-challenge-2211.01091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i4u-system-description-for-nist-sre-20-cts-challenge-2211.01091"/></url>
<url><loc>https://scifaro.com/en/abs/inference-and-denoise-causal-inference-based-neural-speech-enhancement-2211.01189</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inference-and-denoise-causal-inference-based-neural-speech-enhancement-2211.01189"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inference-and-denoise-causal-inference-based-neural-speech-enhancement-2211.01189"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-noisy-target-training-for-dnn-based-speech-enhancement-2211.01198</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-noisy-target-training-for-dnn-based-speech-enhancement-2211.01198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-noisy-target-training-for-dnn-based-speech-enhancement-2211.01198"/></url>
<url><loc>https://scifaro.com/en/abs/data2vec-aqc-search-for-the-right-teaching-assistant-in-the-teacher-student-training-setup-2211.01246</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data2vec-aqc-search-for-the-right-teaching-assistant-in-the-teacher-student-training-setup-2211.01246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data2vec-aqc-search-for-the-right-teaching-assistant-in-the-teacher-student-training-setup-2211.01246"/></url>
<url><loc>https://scifaro.com/en/abs/late-audio-visual-fusion-for-in-the-wild-speaker-diarization-2211.01299</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/late-audio-visual-fusion-for-in-the-wild-speaker-diarization-2211.01299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/late-audio-visual-fusion-for-in-the-wild-speaker-diarization-2211.01299"/></url>
<url><loc>https://scifaro.com/en/abs/technology-pipeline-for-large-scale-cross-lingual-dubbing-of-lecture-videos-into-multiple-indian-languages-2211.01338</loc><lastmod>2022-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/technology-pipeline-for-large-scale-cross-lingual-dubbing-of-lecture-videos-into-multiple-indian-languages-2211.01338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/technology-pipeline-for-large-scale-cross-lingual-dubbing-of-lecture-videos-into-multiple-indian-languages-2211.01338"/></url>
<url><loc>https://scifaro.com/en/abs/variable-attention-masking-for-configurable-transformer-transducer-speech-recognition-2211.01438</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variable-attention-masking-for-configurable-transformer-transducer-speech-recognition-2211.01438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variable-attention-masking-for-configurable-transformer-transducer-speech-recognition-2211.01438"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-segmentation-using-self-supervised-speech-models-2211.01461</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-segmentation-using-self-supervised-speech-models-2211.01461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-segmentation-using-self-supervised-speech-models-2211.01461"/></url>
<url><loc>https://scifaro.com/en/abs/mast-multiscale-audio-spectrogram-transformers-2211.01515</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mast-multiscale-audio-spectrogram-transformers-2211.01515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mast-multiscale-audio-spectrogram-transformers-2211.01515"/></url>
<url><loc>https://scifaro.com/en/abs/slicer-learning-universal-audio-representations-using-low-resource-self-supervised-pre-training-2211.01519</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slicer-learning-universal-audio-representations-using-low-resource-self-supervised-pre-training-2211.01519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slicer-learning-universal-audio-representations-using-low-resource-self-supervised-pre-training-2211.01519"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-assisted-multi-target-units-modeling-for-improving-conformer-transducer-asr-system-2211.01571</loc><lastmod>2023-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-assisted-multi-target-units-modeling-for-improving-conformer-transducer-asr-system-2211.01571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-assisted-multi-target-units-modeling-for-improving-conformer-transducer-asr-system-2211.01571"/></url>
<url><loc>https://scifaro.com/en/abs/convolution-channel-separation-and-frequency-sub-bands-aggregation-for-music-genre-classification-2211.01599</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolution-channel-separation-and-frequency-sub-bands-aggregation-for-music-genre-classification-2211.01599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolution-channel-separation-and-frequency-sub-bands-aggregation-for-music-genre-classification-2211.01599"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-domain-features-for-detecting-adversarial-attacks-against-deep-speech-recognition-in-noise-2211.01621</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-domain-features-for-detecting-adversarial-attacks-against-deep-speech-recognition-in-noise-2211.01621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-domain-features-for-detecting-adversarial-attacks-against-deep-speech-recognition-in-noise-2211.01621"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-data-augmentation-using-vae-gan-for-disordered-speech-recognition-2211.01646</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-data-augmentation-using-vae-gan-for-disordered-speech-recognition-2211.01646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-data-augmentation-using-vae-gan-for-disordered-speech-recognition-2211.01646"/></url>
<url><loc>https://scifaro.com/en/abs/channel-aware-pretraining-of-joint-encoder-decoder-self-supervised-model-for-telephonic-speech-asr-2211.01669</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-aware-pretraining-of-joint-encoder-decoder-self-supervised-model-for-telephonic-speech-asr-2211.01669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-aware-pretraining-of-joint-encoder-decoder-self-supervised-model-for-telephonic-speech-asr-2211.01669"/></url>
<url><loc>https://scifaro.com/en/abs/discussion-of-features-for-acoustic-anomaly-detection-under-industrial-disturbing-noise-in-an-end-of-line-test-of-geared-motors-2211.01716</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discussion-of-features-for-acoustic-anomaly-detection-under-industrial-disturbing-noise-in-an-end-of-line-test-of-geared-motors-2211.01716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discussion-of-features-for-acoustic-anomaly-detection-under-industrial-disturbing-noise-in-an-end-of-line-test-of-geared-motors-2211.01716"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-emotion-recognition-with-self-supervised-models-using-attentive-channel-wise-correlations-and-label-smoothing-2211.01756</loc><lastmod>2022-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-emotion-recognition-with-self-supervised-models-using-attentive-channel-wise-correlations-and-label-smoothing-2211.01756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-emotion-recognition-with-self-supervised-models-using-attentive-channel-wise-correlations-and-label-smoothing-2211.01756"/></url>
<url><loc>https://scifaro.com/en/abs/fearless-steps-challenge-phase-1-evaluation-plan-2211.02051</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fearless-steps-challenge-phase-1-evaluation-plan-2211.02051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fearless-steps-challenge-phase-1-evaluation-plan-2211.02051"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-audio-visual-speech-recognition-with-alignment-regularization-2211.02133</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-audio-visual-speech-recognition-with-alignment-regularization-2211.02133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-audio-visual-speech-recognition-with-alignment-regularization-2211.02133"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-parameter-efficient-tuning-for-general-purpose-audio-models-2211.02227</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-parameter-efficient-tuning-for-general-purpose-audio-models-2211.02227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-parameter-efficient-tuning-for-general-purpose-audio-models-2211.02227"/></url>
<url><loc>https://scifaro.com/en/abs/music-mixing-style-transfer-a-contrastive-learning-approach-to-disentangle-audio-effects-2211.02247</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-mixing-style-transfer-a-contrastive-learning-approach-to-disentangle-audio-effects-2211.02247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-mixing-style-transfer-a-contrastive-learning-approach-to-disentangle-audio-effects-2211.02247"/></url>
<url><loc>https://scifaro.com/en/abs/cochlscene-acquisition-of-acoustic-scene-data-using-crowdsourcing-2211.02289</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cochlscene-acquisition-of-acoustic-scene-data-using-crowdsourcing-2211.02289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cochlscene-acquisition-of-acoustic-scene-data-using-crowdsourcing-2211.02289"/></url>
<url><loc>https://scifaro.com/en/abs/minimum-latency-training-of-sequence-transducers-for-streaming-end-to-end-speech-recognition-2211.02333</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimum-latency-training-of-sequence-transducers-for-streaming-end-to-end-speech-recognition-2211.02333"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimum-latency-training-of-sequence-transducers-for-streaming-end-to-end-speech-recognition-2211.02333"/></url>
<url><loc>https://scifaro.com/en/abs/analysing-diffusion-based-generative-approaches-versus-discriminative-approaches-for-speech-restoration-2211.02397</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysing-diffusion-based-generative-approaches-versus-discriminative-approaches-for-speech-restoration-2211.02397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysing-diffusion-based-generative-approaches-versus-discriminative-approaches-for-speech-restoration-2211.02397"/></url>
<url><loc>https://scifaro.com/en/abs/spatially-selective-deep-non-linear-filters-for-speaker-extraction-2211.02420</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatially-selective-deep-non-linear-filters-for-speaker-extraction-2211.02420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatially-selective-deep-non-linear-filters-for-speaker-extraction-2211.02420"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-rate-offset-estimation-and-compensation-for-distributed-adaptive-node-specific-signal-estimation-in-wireless-acoustic-sensor-networks-2211.02489</loc><lastmod>2023-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-rate-offset-estimation-and-compensation-for-distributed-adaptive-node-specific-signal-estimation-in-wireless-acoustic-sensor-networks-2211.02489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-rate-offset-estimation-and-compensation-for-distributed-adaptive-node-specific-signal-estimation-in-wireless-acoustic-sensor-networks-2211.02489"/></url>
<url><loc>https://scifaro.com/en/abs/neural-feature-predictor-and-discriminative-residual-coding-for-low-bitrate-speech-coding-2211.02506</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-feature-predictor-and-discriminative-residual-coding-for-low-bitrate-speech-coding-2211.02506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-feature-predictor-and-discriminative-residual-coding-for-low-bitrate-speech-coding-2211.02506"/></url>
<url><loc>https://scifaro.com/en/abs/cold-diffusion-for-speech-enhancement-2211.02527</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cold-diffusion-for-speech-enhancement-2211.02527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cold-diffusion-for-speech-enhancement-2211.02527"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-through-synthesis-2211.02542</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-through-synthesis-2211.02542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-speech-enhancement-through-synthesis-2211.02542"/></url>
<url><loc>https://scifaro.com/en/abs/ccatmos-convolutional-context-aware-transformer-network-for-non-intrusive-speech-quality-assessment-2211.02577</loc><lastmod>2022-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ccatmos-convolutional-context-aware-transformer-network-for-non-intrusive-speech-quality-assessment-2211.02577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ccatmos-convolutional-context-aware-transformer-network-for-non-intrusive-speech-quality-assessment-2211.02577"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-ego-noise-references-with-a-microphone-array-embedded-in-an-unmanned-aerial-vehicle-2211.02690</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-ego-noise-references-with-a-microphone-array-embedded-in-an-unmanned-aerial-vehicle-2211.02690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-ego-noise-references-with-a-microphone-array-embedded-in-an-unmanned-aerial-vehicle-2211.02690"/></url>
<url><loc>https://scifaro.com/en/abs/samo-speaker-attractor-multi-center-one-class-learning-for-voice-anti-spoofing-2211.02718</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samo-speaker-attractor-multi-center-one-class-learning-for-voice-anti-spoofing-2211.02718"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samo-speaker-attractor-multi-center-one-class-learning-for-voice-anti-spoofing-2211.02718"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-joint-personalized-speech-enhancement-and-acoustic-echo-cancellation-2211.02773</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-joint-personalized-speech-enhancement-and-acoustic-echo-cancellation-2211.02773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-joint-personalized-speech-enhancement-and-acoustic-echo-cancellation-2211.02773"/></url>
<url><loc>https://scifaro.com/en/abs/breaking-the-trade-off-in-personalized-speech-enhancement-with-cross-task-knowledge-distillation-2211.02944</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breaking-the-trade-off-in-personalized-speech-enhancement-with-cross-task-knowledge-distillation-2211.02944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breaking-the-trade-off-in-personalized-speech-enhancement-with-cross-task-knowledge-distillation-2211.02944"/></url>
<url><loc>https://scifaro.com/en/abs/preserving-background-sound-in-noise-robust-voice-conversion-via-multi-task-learning-2211.03036</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preserving-background-sound-in-noise-robust-voice-conversion-via-multi-task-learning-2211.03036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preserving-background-sound-in-noise-robust-voice-conversion-via-multi-task-learning-2211.03036"/></url>
<url><loc>https://scifaro.com/en/abs/distinguishable-speaker-anonymization-based-on-formant-and-fundamental-frequency-scaling-2211.03038</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distinguishable-speaker-anonymization-based-on-formant-and-fundamental-frequency-scaling-2211.03038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distinguishable-speaker-anonymization-based-on-formant-and-fundamental-frequency-scaling-2211.03038"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-l2-accents-of-cross-lingual-text-to-speech-systems-via-vowel-space-2211.03078</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-l2-accents-of-cross-lingual-text-to-speech-systems-via-vowel-space-2211.03078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-l2-accents-of-cross-lingual-text-to-speech-systems-via-vowel-space-2211.03078"/></url>
<url><loc>https://scifaro.com/en/abs/a-context-aware-computational-approach-for-measuring-vocal-entrainment-in-dyadic-conversations-2211.03279</loc><lastmod>2022-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-context-aware-computational-approach-for-measuring-vocal-entrainment-in-dyadic-conversations-2211.03279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-context-aware-computational-approach-for-measuring-vocal-entrainment-in-dyadic-conversations-2211.03279"/></url>
<url><loc>https://scifaro.com/en/abs/peak-first-ctc-reducing-the-peak-latency-of-ctc-models-by-applying-peak-first-regularization-2211.03284</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peak-first-ctc-reducing-the-peak-latency-of-ctc-models-by-applying-peak-first-regularization-2211.03284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peak-first-ctc-reducing-the-peak-latency-of-ctc-models-by-applying-peak-first-regularization-2211.03284"/></url>
<url><loc>https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-a-conditional-variational-autoencoder-2211.03316</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-a-conditional-variational-autoencoder-2211.03316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-a-conditional-variational-autoencoder-2211.03316"/></url>
<url><loc>https://scifaro.com/en/abs/multi-blank-transducers-for-speech-recognition-2211.03541</loc><lastmod>2024-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-blank-transducers-for-speech-recognition-2211.03541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-blank-transducers-for-speech-recognition-2211.03541"/></url>
<url><loc>https://scifaro.com/en/abs/ernie-sat-speech-and-text-joint-pretraining-for-cross-lingual-multi-speaker-text-to-speech-2211.03545</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ernie-sat-speech-and-text-joint-pretraining-for-cross-lingual-multi-speaker-text-to-speech-2211.03545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ernie-sat-speech-and-text-joint-pretraining-for-cross-lingual-multi-speaker-text-to-speech-2211.03545"/></url>
<url><loc>https://scifaro.com/en/abs/on-negative-sampling-for-contrastive-audio-text-retrieval-2211.04070</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-negative-sampling-for-contrastive-audio-text-retrieval-2211.04070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-negative-sampling-for-contrastive-audio-text-retrieval-2211.04070"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-vocal-dereverberation-with-diffusion-based-generative-models-2211.04124</loc><lastmod>2022-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-vocal-dereverberation-with-diffusion-based-generative-models-2211.04124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-vocal-dereverberation-with-diffusion-based-generative-models-2211.04124"/></url>
<url><loc>https://scifaro.com/en/abs/pushing-the-limits-of-self-supervised-speaker-verification-using-regularized-distillation-framework-2211.04168</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pushing-the-limits-of-self-supervised-speaker-verification-using-regularized-distillation-framework-2211.04168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pushing-the-limits-of-self-supervised-speaker-verification-using-regularized-distillation-framework-2211.04168"/></url>
<url><loc>https://scifaro.com/en/abs/diffphase-generative-diffusion-based-stft-phase-retrieval-2211.04332</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffphase-generative-diffusion-based-stft-phase-retrieval-2211.04332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffphase-generative-diffusion-based-stft-phase-retrieval-2211.04332"/></url>
<url><loc>https://scifaro.com/en/abs/cross-attention-is-all-you-need-real-time-streaming-transformers-for-personalised-speech-enhancement-2211.04346</loc><lastmod>2022-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-attention-is-all-you-need-real-time-streaming-transformers-for-personalised-speech-enhancement-2211.04346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-attention-is-all-you-need-real-time-streaming-transformers-for-personalised-speech-enhancement-2211.04346"/></url>
<url><loc>https://scifaro.com/en/abs/phaseaug-a-differentiable-augmentation-for-speech-synthesis-to-simulate-one-to-many-mapping-2211.04610</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phaseaug-a-differentiable-augmentation-for-speech-synthesis-to-simulate-one-to-many-mapping-2211.04610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phaseaug-a-differentiable-augmentation-for-speech-synthesis-to-simulate-one-to-many-mapping-2211.04610"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-vc-highly-expressive-voice-conversion-with-attention-fusion-of-bottleneck-and-perturbation-features-2211.04710</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-vc-highly-expressive-voice-conversion-with-attention-fusion-of-bottleneck-and-perturbation-features-2211.04710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-vc-highly-expressive-voice-conversion-with-attention-fusion-of-bottleneck-and-perturbation-features-2211.04710"/></url>
<url><loc>https://scifaro.com/en/abs/absolute-decision-corrupts-absolutely-conservative-online-speaker-diarisation-2211.04768</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/absolute-decision-corrupts-absolutely-conservative-online-speaker-diarisation-2211.04768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/absolute-decision-corrupts-absolutely-conservative-online-speaker-diarisation-2211.04768"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-data-augmentation-techniques-for-deep-learning-based-emotion-recognition-2211.05047</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-data-augmentation-techniques-for-deep-learning-based-emotion-recognition-2211.05047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-data-augmentation-techniques-for-deep-learning-based-emotion-recognition-2211.05047"/></url>
<url><loc>https://scifaro.com/en/abs/a-diffeomorphic-flow-based-variational-framework-for-multi-speaker-emotion-conversion-2211.05071</loc><lastmod>2022-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-diffeomorphic-flow-based-variational-framework-for-multi-speaker-emotion-conversion-2211.05071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-diffeomorphic-flow-based-variational-framework-for-multi-speaker-emotion-conversion-2211.05071"/></url>
<url><loc>https://scifaro.com/en/abs/accidental-learners-spoken-language-identification-in-multilingual-self-supervised-models-2211.05103</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accidental-learners-spoken-language-identification-in-multilingual-self-supervised-models-2211.05103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accidental-learners-spoken-language-identification-in-multilingual-self-supervised-models-2211.05103"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-multi-corpora-language-model-training-for-speech-recognition-2211.05121</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-multi-corpora-language-model-training-for-speech-recognition-2211.05121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-multi-corpora-language-model-training-for-speech-recognition-2211.05121"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-with-large-scale-self-supervised-learning-2211.05172</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-with-large-scale-self-supervised-learning-2211.05172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-with-large-scale-self-supervised-learning-2211.05172"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-using-angular-contrastive-loss-2211.05442</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-using-angular-contrastive-loss-2211.05442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-of-audio-representations-using-angular-contrastive-loss-2211.05442"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-with-bi-label-masked-speech-prediction-for-streaming-multi-talker-speech-recognition-2211.05564</loc><lastmod>2022-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-with-bi-label-masked-speech-prediction-for-streaming-multi-talker-speech-recognition-2211.05564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-with-bi-label-masked-speech-prediction-for-streaming-multi-talker-speech-recognition-2211.05564"/></url>
<url><loc>https://scifaro.com/en/abs/remap-warp-and-attend-non-parallel-many-to-many-accent-conversion-with-normalizing-flows-2211.05850</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remap-warp-and-attend-non-parallel-many-to-many-accent-conversion-with-normalizing-flows-2211.05850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remap-warp-and-attend-non-parallel-many-to-many-accent-conversion-with-normalizing-flows-2211.05850"/></url>
<url><loc>https://scifaro.com/en/abs/an-adapter-based-multi-label-pre-training-for-speech-separation-and-enhancement-2211.06041</loc><lastmod>2022-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-adapter-based-multi-label-pre-training-for-speech-separation-and-enhancement-2211.06041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-adapter-based-multi-label-pre-training-for-speech-separation-and-enhancement-2211.06041"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-for-continuous-emotional-intensity-controllable-speech-synthesis-with-disentangled-representations-2211.06160</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-for-continuous-emotional-intensity-controllable-speech-synthesis-with-disentangled-representations-2211.06160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-for-continuous-emotional-intensity-controllable-speech-synthesis-with-disentangled-representations-2211.06160"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-and-adversarial-improve-asr-with-speaker-labels-2211.06369</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-and-adversarial-improve-asr-with-speaker-labels-2211.06369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-and-adversarial-improve-asr-with-speaker-labels-2211.06369"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-breath-sound-based-gender-classification-2211.06371</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-breath-sound-based-gender-classification-2211.06371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-breath-sound-based-gender-classification-2211.06371"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-sequence-to-sequence-transformer-transducer-models-for-keyword-spotting-2211.06478</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-sequence-to-sequence-transformer-transducer-models-for-keyword-spotting-2211.06478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-sequence-to-sequence-transformer-transducer-models-for-keyword-spotting-2211.06478"/></url>
<url><loc>https://scifaro.com/en/abs/augmenting-transformer-transducer-based-speaker-change-detection-with-token-level-training-loss-2211.06482</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmenting-transformer-transducer-based-speaker-change-detection-with-token-level-training-loss-2211.06482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmenting-transformer-transducer-based-speaker-change-detection-with-token-level-training-loss-2211.06482"/></url>
<url><loc>https://scifaro.com/en/abs/handling-trade-offs-in-speech-separation-with-sparsely-gated-mixture-of-experts-2211.06493</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/handling-trade-offs-in-speech-separation-with-sparsely-gated-mixture-of-experts-2211.06493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/handling-trade-offs-in-speech-separation-with-sparsely-gated-mixture-of-experts-2211.06493"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-one-shot-prosody-and-speaker-conversion-system-with-self-supervised-discrete-speech-units-2211.06535</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-one-shot-prosody-and-speaker-conversion-system-with-self-supervised-discrete-speech-units-2211.06535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-one-shot-prosody-and-speaker-conversion-system-with-self-supervised-discrete-speech-units-2211.06535"/></url>
<url><loc>https://scifaro.com/en/abs/investigations-in-audio-captioning-addressing-vocabulary-imbalance-and-evaluating-suitability-of-language-centric-performance-metrics-2211.06547</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigations-in-audio-captioning-addressing-vocabulary-imbalance-and-evaluating-suitability-of-language-centric-performance-metrics-2211.06547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigations-in-audio-captioning-addressing-vocabulary-imbalance-and-evaluating-suitability-of-language-centric-performance-metrics-2211.06547"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-speech-quality-assessment-using-self-supervised-framewise-embeddings-2211.06646</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-speech-quality-assessment-using-self-supervised-framewise-embeddings-2211.06646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-speech-quality-assessment-using-self-supervised-framewise-embeddings-2211.06646"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-and-wide-band-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2211.06750</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-and-wide-band-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2211.06750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-and-wide-band-simulated-conversations-as-training-data-for-end-to-end-neural-diarization-2211.06750"/></url>
<url><loc>https://scifaro.com/en/abs/overflow-putting-flows-on-top-of-neural-transducers-for-better-tts-2211.06892</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overflow-putting-flows-on-top-of-neural-transducers-for-better-tts-2211.06892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overflow-putting-flows-on-top-of-neural-transducers-for-better-tts-2211.06892"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-unified-conformer-structure-from-asr-to-asv-task-2211.07201</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-unified-conformer-structure-from-asr-to-asv-task-2211.07201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-unified-conformer-structure-from-asr-to-asv-task-2211.07201"/></url>
<url><loc>https://scifaro.com/en/abs/sniper-training-single-shot-sparse-training-for-text-to-speech-2211.07283</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sniper-training-single-shot-sparse-training-for-text-to-speech-2211.07283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sniper-training-single-shot-sparse-training-for-text-to-speech-2211.07283"/></url>
<url><loc>https://scifaro.com/en/abs/sentiment-recognition-of-italian-elderly-through-domain-adaptation-on-cross-corpus-speech-dataset-2211.07307</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sentiment-recognition-of-italian-elderly-through-domain-adaptation-on-cross-corpus-speech-dataset-2211.07307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sentiment-recognition-of-italian-elderly-through-domain-adaptation-on-cross-corpus-speech-dataset-2211.07307"/></url>
<url><loc>https://scifaro.com/en/abs/multi-label-training-for-text-independent-speaker-identification-2211.07373</loc><lastmod>2024-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-label-training-for-text-independent-speaker-identification-2211.07373"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-label-training-for-text-independent-speaker-identification-2211.07373"/></url>
<url><loc>https://scifaro.com/en/abs/the-far-side-of-failure-investigating-the-impact-of-speech-recognition-errors-on-subsequent-dementia-classification-2211.07430</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-far-side-of-failure-investigating-the-impact-of-speech-recognition-errors-on-subsequent-dementia-classification-2211.07430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-far-side-of-failure-investigating-the-impact-of-speech-recognition-errors-on-subsequent-dementia-classification-2211.07430"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-impact-of-noise-and-degradations-on-heart-sound-classification-models-2211.07445</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-impact-of-noise-and-degradations-on-heart-sound-classification-models-2211.07445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-impact-of-noise-and-degradations-on-heart-sound-classification-models-2211.07445"/></url>
<url><loc>https://scifaro.com/en/abs/the-potential-of-neural-speech-synthesis-based-data-augmentation-for-personalized-speech-enhancement-2211.07493</loc><lastmod>2022-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-potential-of-neural-speech-synthesis-based-data-augmentation-for-personalized-speech-enhancement-2211.07493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-potential-of-neural-speech-synthesis-based-data-augmentation-for-personalized-speech-enhancement-2211.07493"/></url>
<url><loc>https://scifaro.com/en/abs/on-unsupervised-uncertainty-driven-speech-pseudo-label-filtering-and-model-calibration-2211.07795</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-unsupervised-uncertainty-driven-speech-pseudo-label-filtering-and-model-calibration-2211.07795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-unsupervised-uncertainty-driven-speech-pseudo-label-filtering-and-model-calibration-2211.07795"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-the-combination-of-rehearsal-and-knowledge-distillation-in-continual-learning-for-spoken-language-understanding-2211.08161</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-the-combination-of-rehearsal-and-knowledge-distillation-in-continual-learning-for-spoken-language-understanding-2211.08161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-the-combination-of-rehearsal-and-knowledge-distillation-in-continual-learning-for-spoken-language-understanding-2211.08161"/></url>
<url><loc>https://scifaro.com/en/abs/improved-disentangled-speech-representations-using-contrastive-learning-in-factorized-hierarchical-variational-autoencoder-2211.08191</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-disentangled-speech-representations-using-contrastive-learning-in-factorized-hierarchical-variational-autoencoder-2211.08191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-disentangled-speech-representations-using-contrastive-learning-in-factorized-hierarchical-variational-autoencoder-2211.08191"/></url>
<url><loc>https://scifaro.com/en/abs/is-style-all-you-need-dependencies-between-emotion-and-gst-based-speaker-recognition-2211.08213</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-style-all-you-need-dependencies-between-emotion-and-gst-based-speaker-recognition-2211.08213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-style-all-you-need-dependencies-between-emotion-and-gst-based-speaker-recognition-2211.08213"/></url>
<url><loc>https://scifaro.com/en/abs/reverberation-as-supervision-for-speech-separation-2211.08303</loc><lastmod>2022-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberation-as-supervision-for-speech-separation-2211.08303"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberation-as-supervision-for-speech-separation-2211.08303"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-transformers-for-music-source-separation-2211.08553</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-transformers-for-music-source-separation-2211.08553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-transformers-for-music-source-separation-2211.08553"/></url>
<url><loc>https://scifaro.com/en/abs/array-configuration-agnostic-personalized-speech-enhancement-using-long-short-term-spatial-coherence-2211.08748</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/array-configuration-agnostic-personalized-speech-enhancement-using-long-short-term-spatial-coherence-2211.08748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/array-configuration-agnostic-personalized-speech-enhancement-using-long-short-term-spatial-coherence-2211.08748"/></url>
<url><loc>https://scifaro.com/en/abs/structural-segmentation-and-labeling-of-tabla-solo-performances-2211.08790</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structural-segmentation-and-labeling-of-tabla-solo-performances-2211.08790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structural-segmentation-and-labeling-of-tabla-solo-performances-2211.08790"/></url>
<url><loc>https://scifaro.com/en/abs/on-using-the-ua-speech-and-torgo-databases-to-validate-automatic-dysarthric-speech-classification-approaches-2211.08833</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-using-the-ua-speech-and-torgo-databases-to-validate-automatic-dysarthric-speech-classification-approaches-2211.08833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-using-the-ua-speech-and-torgo-databases-to-validate-automatic-dysarthric-speech-classification-approaches-2211.08833"/></url>
<url><loc>https://scifaro.com/en/abs/annotation-of-soft-onsets-in-string-ensemble-recordings-2211.08848</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/annotation-of-soft-onsets-in-string-ensemble-recordings-2211.08848"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/annotation-of-soft-onsets-in-string-ensemble-recordings-2211.08848"/></url>
<url><loc>https://scifaro.com/en/abs/l2-proficiency-assessment-using-self-supervised-speech-representations-2211.08849</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/l2-proficiency-assessment-using-self-supervised-speech-representations-2211.08849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/l2-proficiency-assessment-using-self-supervised-speech-representations-2211.08849"/></url>
<url><loc>https://scifaro.com/en/abs/delivering-speaking-style-in-low-resource-voice-conversion-with-multi-factor-constraints-2211.08857</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delivering-speaking-style-in-low-resource-voice-conversion-with-multi-factor-constraints-2211.08857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delivering-speaking-style-in-low-resource-voice-conversion-with-multi-factor-constraints-2211.08857"/></url>
<url><loc>https://scifaro.com/en/abs/mcnet-fuse-multiple-cues-for-multichannel-speech-enhancement-2211.08872</loc><lastmod>2022-11-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mcnet-fuse-multiple-cues-for-multichannel-speech-enhancement-2211.08872"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mcnet-fuse-multiple-cues-for-multichannel-speech-enhancement-2211.08872"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-deep-representation-learning-based-speech-enhancement-method-using-variational-autoencoder-and-adversarial-training-2211.09166</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-deep-representation-learning-based-speech-enhancement-method-using-variational-autoencoder-and-adversarial-training-2211.09166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-deep-representation-learning-based-speech-enhancement-method-using-variational-autoencoder-and-adversarial-training-2211.09166"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-model-based-speaker-adaptation-of-end-to-end-lattice-free-mmi-model-for-speech-recognition-2211.09313</loc><lastmod>2023-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-model-based-speaker-adaptation-of-end-to-end-lattice-free-mmi-model-for-speech-recognition-2211.09313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-model-based-speaker-adaptation-of-end-to-end-lattice-free-mmi-model-for-speech-recognition-2211.09313"/></url>
<url><loc>https://scifaro.com/en/abs/spectnet-end-to-end-audio-signal-classification-using-learnable-spectrograms-2211.09352</loc><lastmod>2022-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectnet-end-to-end-audio-signal-classification-using-learnable-spectrograms-2211.09352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectnet-end-to-end-audio-signal-classification-using-learnable-spectrograms-2211.09352"/></url>
<url><loc>https://scifaro.com/en/abs/grad-stylespeech-any-speaker-adaptive-text-to-speech-synthesis-with-diffusion-models-2211.09383</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/grad-stylespeech-any-speaker-adaptive-text-to-speech-synthesis-with-diffusion-models-2211.09383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/grad-stylespeech-any-speaker-adaptive-text-to-speech-synthesis-with-diffusion-models-2211.09383"/></url>
<url><loc>https://scifaro.com/en/abs/emodiff-intensity-controllable-emotional-text-to-speech-with-soft-label-guidance-2211.09496</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emodiff-intensity-controllable-emotional-text-to-speech-with-soft-label-guidance-2211.09496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emodiff-intensity-controllable-emotional-text-to-speech-with-soft-label-guidance-2211.09496"/></url>
<url><loc>https://scifaro.com/en/abs/a-persian-asr-based-ser-modification-of-sharif-emotional-speech-database-and-investigation-of-persian-text-corpora-2211.09956</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-persian-asr-based-ser-modification-of-sharif-emotional-speech-database-and-investigation-of-persian-text-corpora-2211.09956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-persian-asr-based-ser-modification-of-sharif-emotional-speech-database-and-investigation-of-persian-text-corpora-2211.09956"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-wavlm-on-speech-enhancement-2211.09988</loc><lastmod>2022-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-wavlm-on-speech-enhancement-2211.09988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-wavlm-on-speech-enhancement-2211.09988"/></url>
<url><loc>https://scifaro.com/en/abs/self-transriber-few-shot-lyrics-transcription-with-self-training-2211.10152</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-transriber-few-shot-lyrics-transcription-with-self-training-2211.10152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-transriber-few-shot-lyrics-transcription-with-self-training-2211.10152"/></url>
<url><loc>https://scifaro.com/en/abs/self-remixing-unsupervised-speech-separation-via-separation-and-remixing-2211.10194</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-remixing-unsupervised-speech-separation-via-separation-and-remixing-2211.10194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-remixing-unsupervised-speech-separation-via-separation-and-remixing-2211.10194"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-visual-assistance-for-automated-audio-captioning-2211.10539</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-visual-assistance-for-automated-audio-captioning-2211.10539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-visual-assistance-for-automated-audio-captioning-2211.10539"/></url>
<url><loc>https://scifaro.com/en/abs/filterbank-learning-for-noise-robust-small-footprint-keyword-spotting-2211.10565</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filterbank-learning-for-noise-robust-small-footprint-keyword-spotting-2211.10565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filterbank-learning-for-noise-robust-small-footprint-keyword-spotting-2211.10565"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-expressive-speech-synthesis-via-multiple-factors-decoupling-2211.10568</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-expressive-speech-synthesis-via-multiple-factors-decoupling-2211.10568"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-expressive-speech-synthesis-via-multiple-factors-decoupling-2211.10568"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneously-learning-robust-audio-embeddings-and-balanced-hash-codes-for-query-by-example-2211.11060</loc><lastmod>2023-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneously-learning-robust-audio-embeddings-and-balanced-hash-codes-for-query-by-example-2211.11060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneously-learning-robust-audio-embeddings-and-balanced-hash-codes-for-query-by-example-2211.11060"/></url>
<url><loc>https://scifaro.com/en/abs/embedding-a-differentiable-mel-cepstral-synthesis-filter-to-a-neural-speech-synthesis-system-2211.11222</loc><lastmod>2022-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/embedding-a-differentiable-mel-cepstral-synthesis-filter-to-a-neural-speech-synthesis-system-2211.11222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/embedding-a-differentiable-mel-cepstral-synthesis-filter-to-a-neural-speech-synthesis-system-2211.11222"/></url>
<url><loc>https://scifaro.com/en/abs/vatlm-visual-audio-text-pre-training-with-unified-masked-prediction-for-speech-representation-learning-2211.11275</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vatlm-visual-audio-text-pre-training-with-unified-masked-prediction-for-speech-representation-learning-2211.11275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vatlm-visual-audio-text-pre-training-with-unified-masked-prediction-for-speech-representation-learning-2211.11275"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-acoustic-compensation-and-adaptive-focal-training-for-personalized-speech-enhancement-2211.12097</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-acoustic-compensation-and-adaptive-focal-training-for-personalized-speech-enhancement-2211.12097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-acoustic-compensation-and-adaptive-focal-training-for-personalized-speech-enhancement-2211.12097"/></url>
<url><loc>https://scifaro.com/en/abs/prompttts-controllable-text-to-speech-with-text-descriptions-2211.12171</loc><lastmod>2022-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompttts-controllable-text-to-speech-with-text-descriptions-2211.12171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompttts-controllable-text-to-speech-with-text-descriptions-2211.12171"/></url>
<url><loc>https://scifaro.com/en/abs/ontology-aware-learning-and-evaluation-for-audio-tagging-2211.12195</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ontology-aware-learning-and-evaluation-for-audio-tagging-2211.12195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ontology-aware-learning-and-evaluation-for-audio-tagging-2211.12195"/></url>
<url><loc>https://scifaro.com/en/abs/deep-neural-mel-subband-beamformer-for-in-car-speech-separation-2211.12590</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-neural-mel-subband-beamformer-for-in-car-speech-separation-2211.12590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-neural-mel-subband-beamformer-for-in-car-speech-separation-2211.12590"/></url>
<url><loc>https://scifaro.com/en/abs/skipconvgan-monaural-speech-dereverberation-using-generative-adversarial-networks-via-complex-time-frequency-masking-2211.12623</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skipconvgan-monaural-speech-dereverberation-using-generative-adversarial-networks-via-complex-time-frequency-masking-2211.12623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skipconvgan-monaural-speech-dereverberation-using-generative-adversarial-networks-via-complex-time-frequency-masking-2211.12623"/></url>
<url><loc>https://scifaro.com/en/abs/complex-valued-time-frequency-self-attention-for-speech-dereverberation-2211.12632</loc><lastmod>2022-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-valued-time-frequency-self-attention-for-speech-dereverberation-2211.12632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-valued-time-frequency-self-attention-for-speech-dereverberation-2211.12632"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-speech-feature-fusion-method-with-cross-gate-parallel-cnn-for-speaker-recognition-2211.13377</loc><lastmod>2022-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-speech-feature-fusion-method-with-cross-gate-parallel-cnn-for-speaker-recognition-2211.13377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-speech-feature-fusion-method-with-cross-gate-parallel-cnn-for-speaker-recognition-2211.13377"/></url>
<url><loc>https://scifaro.com/en/abs/prosody-controllable-spontaneous-tts-with-neural-hmms-2211.13533</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prosody-controllable-spontaneous-tts-with-neural-hmms-2211.13533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prosody-controllable-spontaneous-tts-with-neural-hmms-2211.13533"/></url>
<url><loc>https://scifaro.com/en/abs/interpretability-analysis-of-deep-models-for-covid-19-detection-2211.14372</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpretability-analysis-of-deep-models-for-covid-19-detection-2211.14372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpretability-analysis-of-deep-models-for-covid-19-detection-2211.14372"/></url>
<url><loc>https://scifaro.com/en/abs/stereo-speech-enhancement-using-custom-mid-side-signals-and-monaural-processing-2211.14378</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stereo-speech-enhancement-using-custom-mid-side-signals-and-monaural-processing-2211.14378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stereo-speech-enhancement-using-custom-mid-side-signals-and-monaural-processing-2211.14378"/></url>
<url><loc>https://scifaro.com/en/abs/contextual-expressive-text-to-speech-2211.14548</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextual-expressive-text-to-speech-2211.14548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextual-expressive-text-to-speech-2211.14548"/></url>
<url><loc>https://scifaro.com/en/abs/inter-kd-intermediate-knowledge-distillation-for-ctc-based-automatic-speech-recognition-2211.15075</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inter-kd-intermediate-knowledge-distillation-for-ctc-based-automatic-speech-recognition-2211.15075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inter-kd-intermediate-knowledge-distillation-for-ctc-based-automatic-speech-recognition-2211.15075"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-transcription-of-drum-strokes-in-carnatic-music-2211.15185</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-transcription-of-drum-strokes-in-carnatic-music-2211.15185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-transcription-of-drum-strokes-in-carnatic-music-2211.15185"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-front-ends-based-on-temporal-modulation-for-music-tagging-2211.15254</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-front-ends-based-on-temporal-modulation-for-music-tagging-2211.15254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-front-ends-based-on-temporal-modulation-for-music-tagging-2211.15254"/></url>
<url><loc>https://scifaro.com/en/abs/whose-emotion-matters-speaking-activity-localisation-without-prior-knowledge-2211.15377</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whose-emotion-matters-speaking-activity-localisation-without-prior-knowledge-2211.15377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whose-emotion-matters-speaking-activity-localisation-without-prior-knowledge-2211.15377"/></url>
<url><loc>https://scifaro.com/en/abs/probabilistic-modelling-of-signal-mixtures-with-differentiable-dictionaries-2211.15439</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probabilistic-modelling-of-signal-mixtures-with-differentiable-dictionaries-2211.15439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probabilistic-modelling-of-signal-mixtures-with-differentiable-dictionaries-2211.15439"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-dictionary-search-integrating-linear-mixing-with-deep-non-linear-modelling-for-audio-source-separation-2211.15524</loc><lastmod>2022-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-dictionary-search-integrating-linear-mixing-with-deep-non-linear-modelling-for-audio-source-separation-2211.15524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-dictionary-search-integrating-linear-mixing-with-deep-non-linear-modelling-for-audio-source-separation-2211.15524"/></url>
<url><loc>https://scifaro.com/en/abs/jacappella-corpus-a-japanese-a-cappella-vocal-ensemble-corpus-2211.16028</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jacappella-corpus-a-japanese-a-cappella-vocal-ensemble-corpus-2211.16028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jacappella-corpus-a-japanese-a-cappella-vocal-ensemble-corpus-2211.16028"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-and-reducing-the-distance-between-synthetic-and-real-speech-distributions-2211.16049</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-and-reducing-the-distance-between-synthetic-and-real-speech-distributions-2211.16049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-and-reducing-the-distance-between-synthetic-and-real-speech-distributions-2211.16049"/></url>
<url><loc>https://scifaro.com/en/abs/hiding-speaker-s-sex-in-speech-using-zero-evidence-speaker-representation-in-an-analysis-synthesis-pipeline-2211.16065</loc><lastmod>2023-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hiding-speaker-s-sex-in-speech-using-zero-evidence-speaker-representation-in-an-analysis-synthesis-pipeline-2211.16065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hiding-speaker-s-sex-in-speech-using-zero-evidence-speaker-representation-in-an-analysis-synthesis-pipeline-2211.16065"/></url>
<url><loc>https://scifaro.com/en/abs/on-word-error-rate-definitions-and-their-efficient-computation-for-multi-speaker-speech-recognition-systems-2211.16112</loc><lastmod>2023-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-word-error-rate-definitions-and-their-efficient-computation-for-multi-speaker-speech-recognition-systems-2211.16112"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-word-error-rate-definitions-and-their-efficient-computation-for-multi-speaker-speech-recognition-systems-2211.16112"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-evaluation-metrics-for-code-switching-automatic-speech-recognition-2211.16319</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-evaluation-metrics-for-code-switching-automatic-speech-recognition-2211.16319"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-evaluation-metrics-for-code-switching-automatic-speech-recognition-2211.16319"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-constant-q-filterbank-based-representations-for-speech-emotion-recognition-2211.16363</loc><lastmod>2022-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-constant-q-filterbank-based-representations-for-speech-emotion-recognition-2211.16363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-constant-q-filterbank-based-representations-for-speech-emotion-recognition-2211.16363"/></url>
<url><loc>https://scifaro.com/en/abs/msv-challenge-2022-npu-hc-speaker-verification-system-for-low-resource-indian-languages-2211.16694</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msv-challenge-2022-npu-hc-speaker-verification-system-for-low-resource-indian-languages-2211.16694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msv-challenge-2022-npu-hc-speaker-verification-system-for-low-resource-indian-languages-2211.16694"/></url>
<url><loc>https://scifaro.com/en/abs/snac-speaker-normalized-affine-coupling-layer-in-flow-based-architecture-for-zero-shot-multi-speaker-text-to-speech-2211.16866</loc><lastmod>2022-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snac-speaker-normalized-affine-coupling-layer-in-flow-based-architecture-for-zero-shot-multi-speaker-text-to-speech-2211.16866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snac-speaker-normalized-affine-coupling-layer-in-flow-based-architecture-for-zero-shot-multi-speaker-text-to-speech-2211.16866"/></url>
<url><loc>https://scifaro.com/en/abs/extreme-audio-time-stretching-using-neural-synthesis-2211.16992</loc><lastmod>2022-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extreme-audio-time-stretching-using-neural-synthesis-2211.16992"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extreme-audio-time-stretching-using-neural-synthesis-2211.16992"/></url>
<url><loc>https://scifaro.com/en/abs/better-transcription-of-uk-supreme-court-hearings-2211.17094</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/better-transcription-of-uk-supreme-court-hearings-2211.17094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/better-transcription-of-uk-supreme-court-hearings-2211.17094"/></url>
<url><loc>https://scifaro.com/en/abs/assisted-rtf-vector-based-binaural-direction-of-arrival-estimation-exploiting-a-calibrated-external-microphone-array-2211.17202</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assisted-rtf-vector-based-binaural-direction-of-arrival-estimation-exploiting-a-calibrated-external-microphone-array-2211.17202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assisted-rtf-vector-based-binaural-direction-of-arrival-estimation-exploiting-a-calibrated-external-microphone-array-2211.17202"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-speech-feature-fusion-algorithm-for-text-independent-speaker-recognition-2212.00329</loc><lastmod>2022-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-speech-feature-fusion-algorithm-for-text-independent-speaker-recognition-2212.00329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-speech-feature-fusion-algorithm-for-text-independent-speaker-recognition-2212.00329"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-speech-enhancement-with-band-split-rnn-2212.00406</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-speech-enhancement-with-band-split-rnn-2212.00406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-speech-enhancement-with-band-split-rnn-2212.00406"/></url>
<url><loc>https://scifaro.com/en/abs/injecting-spatial-information-for-monaural-speech-enhancement-via-knowledge-distillation-2212.01012</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/injecting-spatial-information-for-monaural-speech-enhancement-via-knowledge-distillation-2212.01012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/injecting-spatial-information-for-monaural-speech-enhancement-via-knowledge-distillation-2212.01012"/></url>
<url><loc>https://scifaro.com/en/abs/exarn-self-attending-rnn-for-target-speaker-extraction-2212.01106</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exarn-self-attending-rnn-for-target-speaker-extraction-2212.01106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exarn-self-attending-rnn-for-target-speaker-extraction-2212.01106"/></url>
<url><loc>https://scifaro.com/en/abs/preliminary-study-on-sscf-derived-polar-coordinate-for-asr-2212.01245</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preliminary-study-on-sscf-derived-polar-coordinate-for-asr-2212.01245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preliminary-study-on-sscf-derived-polar-coordinate-for-asr-2212.01245"/></url>
<url><loc>https://scifaro.com/en/abs/chapter-exploiting-convolutional-neural-network-adapters-for-self-supervised-speech-models-2212.01282</loc><lastmod>2023-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chapter-exploiting-convolutional-neural-network-adapters-for-self-supervised-speech-models-2212.01282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chapter-exploiting-convolutional-neural-network-adapters-for-self-supervised-speech-models-2212.01282"/></url>
<url><loc>https://scifaro.com/en/abs/relative-acoustic-features-for-distance-estimation-in-smart-homes-2212.01306</loc><lastmod>2022-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relative-acoustic-features-for-distance-estimation-in-smart-homes-2212.01306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relative-acoustic-features-for-distance-estimation-in-smart-homes-2212.01306"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-on-device-speech-recognition-using-disentangled-conformers-2212.01393</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-on-device-speech-recognition-using-disentangled-conformers-2212.01393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-on-device-speech-recognition-using-disentangled-conformers-2212.01393"/></url>
<url><loc>https://scifaro.com/en/abs/investigations-on-the-influence-of-combined-inter-aural-cue-distortions-on-overall-audio-quality-2212.01427</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigations-on-the-influence-of-combined-inter-aural-cue-distortions-on-overall-audio-quality-2212.01427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigations-on-the-influence-of-combined-inter-aural-cue-distortions-on-overall-audio-quality-2212.01427"/></url>
<url><loc>https://scifaro.com/en/abs/objective-assessment-of-spatial-audio-quality-using-directional-loudness-maps-2212.01451</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/objective-assessment-of-spatial-audio-quality-using-directional-loudness-maps-2212.01451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/objective-assessment-of-spatial-audio-quality-using-directional-loudness-maps-2212.01451"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-still-use-peaq-a-performance-analysis-of-the-itu-standard-for-the-objective-assessment-of-perceived-audio-quality-2212.01467</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-still-use-peaq-a-performance-analysis-of-the-itu-standard-for-the-objective-assessment-of-perceived-audio-quality-2212.01467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-still-use-peaq-a-performance-analysis-of-the-itu-standard-for-the-objective-assessment-of-perceived-audio-quality-2212.01467"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-fine-tuning-data-selection-for-asr-using-self-supervised-speech-models-2212.01661</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-fine-tuning-data-selection-for-asr-using-self-supervised-speech-models-2212.01661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-fine-tuning-data-selection-for-asr-using-self-supervised-speech-models-2212.01661"/></url>
<url><loc>https://scifaro.com/en/abs/improving-end-to-end-speech-translation-by-leveraging-auxiliary-speech-and-text-data-2212.01778</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-end-to-end-speech-translation-by-leveraging-auxiliary-speech-and-text-data-2212.01778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-end-to-end-speech-translation-by-leveraging-auxiliary-speech-and-text-data-2212.01778"/></url>
<url><loc>https://scifaro.com/en/abs/tragic-talkers-a-shakespearean-sound-and-light-field-dataset-for-audio-visual-machine-learning-research-2212.01892</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tragic-talkers-a-shakespearean-sound-and-light-field-dataset-for-audio-visual-machine-learning-research-2212.01892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tragic-talkers-a-shakespearean-sound-and-light-field-dataset-for-audio-visual-machine-learning-research-2212.01892"/></url>
<url><loc>https://scifaro.com/en/abs/evince-the-artifacts-of-spoof-speech-by-blending-vocal-tract-and-voice-source-features-2212.02013</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evince-the-artifacts-of-spoof-speech-by-blending-vocal-tract-and-voice-source-features-2212.02013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evince-the-artifacts-of-spoof-speech-by-blending-vocal-tract-and-voice-source-features-2212.02013"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generating-diverse-audio-captions-via-adversarial-training-2212.02033</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generating-diverse-audio-captions-via-adversarial-training-2212.02033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generating-diverse-audio-captions-via-adversarial-training-2212.02033"/></url>
<url><loc>https://scifaro.com/en/abs/lmec-learnable-multiplicative-absolute-position-embedding-based-conformer-for-speech-recognition-2212.02099</loc><lastmod>2022-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lmec-learnable-multiplicative-absolute-position-embedding-based-conformer-for-speech-recognition-2212.02099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lmec-learnable-multiplicative-absolute-position-embedding-based-conformer-for-speech-recognition-2212.02099"/></url>
<url><loc>https://scifaro.com/en/abs/sound-emergence-as-a-predictor-of-short-term-annoyance-from-wind-turbine-noise-2212.02616</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-emergence-as-a-predictor-of-short-term-annoyance-from-wind-turbine-noise-2212.02616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-emergence-as-a-predictor-of-short-term-annoyance-from-wind-turbine-noise-2212.02616"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-audio-visual-speech-representations-learning-by-multimodal-self-distillation-2212.02782</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-audio-visual-speech-representations-learning-by-multimodal-self-distillation-2212.02782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-audio-visual-speech-representations-learning-by-multimodal-self-distillation-2212.02782"/></url>
<url><loc>https://scifaro.com/en/abs/bc-vad-a-robust-bone-conduction-voice-activity-detection-2212.02996</loc><lastmod>2022-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bc-vad-a-robust-bone-conduction-voice-activity-detection-2212.02996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bc-vad-a-robust-bone-conduction-voice-activity-detection-2212.02996"/></url>
<url><loc>https://scifaro.com/en/abs/fretnet-continuous-valued-pitch-contour-streaming-for-polyphonic-guitar-tablature-transcription-2212.03023</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fretnet-continuous-valued-pitch-contour-streaming-for-polyphonic-guitar-tablature-transcription-2212.03023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fretnet-continuous-valued-pitch-contour-streaming-for-polyphonic-guitar-tablature-transcription-2212.03023"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-utilization-of-entrainment-on-acoustic-and-emotion-features-in-user-agent-dialogue-2212.03398</loc><lastmod>2022-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-utilization-of-entrainment-on-acoustic-and-emotion-features-in-user-agent-dialogue-2212.03398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-utilization-of-entrainment-on-acoustic-and-emotion-features-in-user-agent-dialogue-2212.03398"/></url>
<url><loc>https://scifaro.com/en/abs/mimo-dbnet-multi-channel-input-and-multiple-outputs-doa-aware-beamforming-network-for-speech-separation-2212.03401</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimo-dbnet-multi-channel-input-and-multiple-outputs-doa-aware-beamforming-network-for-speech-separation-2212.03401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimo-dbnet-multi-channel-input-and-multiple-outputs-doa-aware-beamforming-network-for-speech-separation-2212.03401"/></url>
<url><loc>https://scifaro.com/en/abs/selector-enhancer-learning-dynamic-selection-of-local-and-non-local-attention-operation-for-speech-enhancement-2212.03408</loc><lastmod>2023-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selector-enhancer-learning-dynamic-selection-of-local-and-non-local-attention-operation-for-speech-enhancement-2212.03408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selector-enhancer-learning-dynamic-selection-of-local-and-non-local-attention-operation-for-speech-enhancement-2212.03408"/></url>
<url><loc>https://scifaro.com/en/abs/improving-trajectory-localization-accuracy-via-direction-of-arrival-derivative-estimation-2212.03470</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-trajectory-localization-accuracy-via-direction-of-arrival-derivative-estimation-2212.03470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-trajectory-localization-accuracy-via-direction-of-arrival-derivative-estimation-2212.03470"/></url>
<url><loc>https://scifaro.com/en/abs/improved-self-supervised-multilingual-speech-representation-learning-combined-with-auxiliary-language-information-2212.03476</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-self-supervised-multilingual-speech-representation-learning-combined-with-auxiliary-language-information-2212.03476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-self-supervised-multilingual-speech-representation-learning-combined-with-auxiliary-language-information-2212.03476"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-multi-scale-self-supervised-learning-for-speech-recognition-2212.03480</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-multi-scale-self-supervised-learning-for-speech-recognition-2212.03480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-multi-scale-self-supervised-learning-for-speech-recognition-2212.03480"/></url>
<url><loc>https://scifaro.com/en/abs/improved-speech-pre-training-with-supervision-enhanced-acoustic-unit-2212.03482</loc><lastmod>2022-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-speech-pre-training-with-supervision-enhanced-acoustic-unit-2212.03482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-speech-pre-training-with-supervision-enhanced-acoustic-unit-2212.03482"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relevance-of-the-differences-between-hrtf-measurement-setups-for-machine-learning-2212.04283</loc><lastmod>2022-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relevance-of-the-differences-between-hrtf-measurement-setups-for-machine-learning-2212.04283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relevance-of-the-differences-between-hrtf-measurement-setups-for-machine-learning-2212.04283"/></url>
<url><loc>https://scifaro.com/en/abs/lattice-free-sequence-discriminative-training-for-phoneme-based-neural-transducers-2212.04325</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lattice-free-sequence-discriminative-training-for-phoneme-based-neural-transducers-2212.04325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lattice-free-sequence-discriminative-training-for-phoneme-based-neural-transducers-2212.04325"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speech-recognition-via-large-scale-weak-supervision-2212.04356</loc><lastmod>2022-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speech-recognition-via-large-scale-weak-supervision-2212.04356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speech-recognition-via-large-scale-weak-supervision-2212.04356"/></url>
<url><loc>https://scifaro.com/en/abs/dred-deep-redundancy-coding-of-speech-using-a-rate-distortion-optimized-variational-autoencoder-2212.04453</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dred-deep-redundancy-coding-of-speech-using-a-rate-distortion-optimized-variational-autoencoder-2212.04453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dred-deep-redundancy-coding-of-speech-using-a-rate-distortion-optimized-variational-autoencoder-2212.04453"/></url>
<url><loc>https://scifaro.com/en/abs/framewise-wavegan-high-speed-adversarial-vocoder-in-time-domain-with-very-low-computational-complexity-2212.04532</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/framewise-wavegan-high-speed-adversarial-vocoder-in-time-domain-with-very-low-computational-complexity-2212.04532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/framewise-wavegan-high-speed-adversarial-vocoder-in-time-domain-with-very-low-computational-complexity-2212.04532"/></url>
<url><loc>https://scifaro.com/en/abs/speechlmscore-evaluating-speech-generation-using-speech-language-model-2212.04559</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechlmscore-evaluating-speech-generation-using-speech-language-model-2212.04559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechlmscore-evaluating-speech-generation-using-speech-language-model-2212.04559"/></url>
<url><loc>https://scifaro.com/en/abs/a-data-driven-cognitive-salience-model-for-objective-perceptual-audio-quality-assessment-2212.04572</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-data-driven-cognitive-salience-model-for-objective-perceptual-audio-quality-assessment-2212.04572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-data-driven-cognitive-salience-model-for-objective-perceptual-audio-quality-assessment-2212.04572"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-audio-coding-with-mdctnet-2212.04583</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-audio-coding-with-mdctnet-2212.04583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-audio-coding-with-mdctnet-2212.04583"/></url>
<url><loc>https://scifaro.com/en/abs/geometry-aware-doa-estimation-using-a-deep-neural-network-with-mixed-data-input-features-2212.04788</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/geometry-aware-doa-estimation-using-a-deep-neural-network-with-mixed-data-input-features-2212.04788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/geometry-aware-doa-estimation-using-a-deep-neural-network-with-mixed-data-input-features-2212.04788"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-estimation-in-deep-speech-enhancement-using-complex-gaussian-mixture-models-2212.04831</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-estimation-in-deep-speech-enhancement-using-complex-gaussian-mixture-models-2212.04831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-estimation-in-deep-speech-enhancement-using-complex-gaussian-mixture-models-2212.04831"/></url>
<url><loc>https://scifaro.com/en/abs/ddsupport-language-learning-support-system-that-displays-differences-and-distances-from-model-speech-2212.04930</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddsupport-language-learning-support-system-that-displays-differences-and-distances-from-model-speech-2212.04930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddsupport-language-learning-support-system-that-displays-differences-and-distances-from-model-speech-2212.04930"/></url>
<url><loc>https://scifaro.com/en/abs/hyperbolic-audio-source-separation-2212.05008</loc><lastmod>2022-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperbolic-audio-source-separation-2212.05008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperbolic-audio-source-separation-2212.05008"/></url>
<url><loc>https://scifaro.com/en/abs/gpu-accelerated-guided-source-separation-for-meeting-transcription-2212.05271</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gpu-accelerated-guided-source-separation-for-meeting-transcription-2212.05271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gpu-accelerated-guided-source-separation-for-meeting-transcription-2212.05271"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-wave-geometric-impulse-responses-for-improved-speech-dereverberation-2212.05360</loc><lastmod>2022-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-wave-geometric-impulse-responses-for-improved-speech-dereverberation-2212.05360"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-wave-geometric-impulse-responses-for-improved-speech-dereverberation-2212.05360"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-accent-conversion-using-pseudo-siamese-disentanglement-network-2212.05751</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-accent-conversion-using-pseudo-siamese-disentanglement-network-2212.05751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-accent-conversion-using-pseudo-siamese-disentanglement-network-2212.05751"/></url>
<url><loc>https://scifaro.com/en/abs/dopplerbas-binaural-audio-synthesis-addressing-doppler-effect-2212.07000</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dopplerbas-binaural-audio-synthesis-addressing-doppler-effect-2212.07000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dopplerbas-binaural-audio-synthesis-addressing-doppler-effect-2212.07000"/></url>
<url><loc>https://scifaro.com/en/abs/probing-deep-speaker-embeddings-for-speaker-related-tasks-2212.07068</loc><lastmod>2022-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-deep-speaker-embeddings-for-speaker-related-tasks-2212.07068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-deep-speaker-embeddings-for-speaker-related-tasks-2212.07068"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-the-cocktail-fork-problem-for-separation-and-transcription-of-real-world-soundtracks-2212.07327</loc><lastmod>2022-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-the-cocktail-fork-problem-for-separation-and-transcription-of-real-world-soundtracks-2212.07327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-the-cocktail-fork-problem-for-separation-and-transcription-of-real-world-soundtracks-2212.07327"/></url>
<url><loc>https://scifaro.com/en/abs/deft-an-dense-frequency-time-attentive-network-for-multichannel-speech-enhancement-2212.07570</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deft-an-dense-frequency-time-attentive-network-for-multichannel-speech-enhancement-2212.07570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deft-an-dense-frequency-time-attentive-network-for-multichannel-speech-enhancement-2212.07570"/></url>
<url><loc>https://scifaro.com/en/abs/improving-fast-slow-encoder-based-transducer-with-streaming-deliberation-2212.07650</loc><lastmod>2022-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-fast-slow-encoder-based-transducer-with-streaming-deliberation-2212.07650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-fast-slow-encoder-based-transducer-with-streaming-deliberation-2212.07650"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-japanese-png-bert-language-model-in-text-to-speech-synthesis-for-pitch-accent-language-2212.08321</loc><lastmod>2022-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-japanese-png-bert-language-model-in-text-to-speech-synthesis-for-pitch-accent-language-2212.08321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-japanese-png-bert-language-model-in-text-to-speech-synthesis-for-pitch-accent-language-2212.08321"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-synthesis-based-on-latent-variable-conversion-using-diffusion-probabilistic-model-and-variational-autoencoder-2212.08329</loc><lastmod>2022-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-synthesis-based-on-latent-variable-conversion-using-diffusion-probabilistic-model-and-variational-autoencoder-2212.08329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-synthesis-based-on-latent-variable-conversion-using-diffusion-probabilistic-model-and-variational-autoencoder-2212.08329"/></url>
<url><loc>https://scifaro.com/en/abs/context-aware-fine-tuning-of-self-supervised-speech-models-2212.08542</loc><lastmod>2023-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/context-aware-fine-tuning-of-self-supervised-speech-models-2212.08542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/context-aware-fine-tuning-of-self-supervised-speech-models-2212.08542"/></url>
<url><loc>https://scifaro.com/en/abs/fast-entropy-based-methods-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2212.08703</loc><lastmod>2023-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-entropy-based-methods-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2212.08703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-entropy-based-methods-of-word-level-confidence-estimation-for-end-to-end-automatic-speech-recognition-2212.08703"/></url>
<url><loc>https://scifaro.com/en/abs/fast-fullsubnet-accelerate-full-band-and-sub-band-fusion-model-for-single-channel-speech-enhancement-2212.09019</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-fullsubnet-accelerate-full-band-and-sub-band-fusion-model-for-single-channel-speech-enhancement-2212.09019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-fullsubnet-accelerate-full-band-and-sub-band-fusion-model-for-single-channel-speech-enhancement-2212.09019"/></url>
<url><loc>https://scifaro.com/en/abs/beats-audio-pre-training-with-acoustic-tokenizers-2212.09058</loc><lastmod>2022-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beats-audio-pre-training-with-acoustic-tokenizers-2212.09058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beats-audio-pre-training-with-acoustic-tokenizers-2212.09058"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-quality-of-neural-tts-using-long-form-content-and-multi-speaker-multi-style-modeling-2212.10075</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-quality-of-neural-tts-using-long-form-content-and-multi-speaker-multi-style-modeling-2212.10075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-quality-of-neural-tts-using-long-form-content-and-multi-speaker-multi-style-modeling-2212.10075"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-feature-method-for-prosody-rhythm-comparison-2212.10201</loc><lastmod>2022-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-feature-method-for-prosody-rhythm-comparison-2212.10201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-feature-method-for-prosody-rhythm-comparison-2212.10201"/></url>
<url><loc>https://scifaro.com/en/abs/tts-guided-training-for-accent-conversion-without-parallel-data-2212.10204</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-guided-training-for-accent-conversion-without-parallel-data-2212.10204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-guided-training-for-accent-conversion-without-parallel-data-2212.10204"/></url>
<url><loc>https://scifaro.com/en/abs/revise-self-supervised-speech-resynthesis-with-visual-input-for-universal-and-generalized-speech-enhancement-2212.11377</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revise-self-supervised-speech-resynthesis-with-visual-input-for-universal-and-generalized-speech-enhancement-2212.11377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revise-self-supervised-speech-resynthesis-with-visual-input-for-universal-and-generalized-speech-enhancement-2212.11377"/></url>
<url><loc>https://scifaro.com/en/abs/storm-a-diffusion-based-stochastic-regeneration-model-for-speech-enhancement-and-dereverberation-2212.11851</loc><lastmod>2024-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/storm-a-diffusion-based-stochastic-regeneration-model-for-speech-enhancement-and-dereverberation-2212.11851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/storm-a-diffusion-based-stochastic-regeneration-model-for-speech-enhancement-and-dereverberation-2212.11851"/></url>
<url><loc>https://scifaro.com/en/abs/hmm-based-data-augmentation-for-e2e-systems-for-building-conversational-speech-synthesis-systems-2212.11982</loc><lastmod>2022-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hmm-based-data-augmentation-for-e2e-systems-for-building-conversational-speech-synthesis-systems-2212.11982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hmm-based-data-augmentation-for-e2e-systems-for-building-conversational-speech-synthesis-systems-2212.11982"/></url>
<url><loc>https://scifaro.com/en/abs/large-raw-emotional-dataset-with-aggregation-mechanism-2212.12266</loc><lastmod>2022-12-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-raw-emotional-dataset-with-aggregation-mechanism-2212.12266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-raw-emotional-dataset-with-aggregation-mechanism-2212.12266"/></url>
<url><loc>https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-from-speech-signals-based-on-extended-model-of-room-impulse-response-2212.13009</loc><lastmod>2022-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-from-speech-signals-based-on-extended-model-of-room-impulse-response-2212.13009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-estimation-of-room-acoustic-parameters-from-speech-signals-based-on-extended-model-of-room-impulse-response-2212.13009"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-based-on-a-musical-note-position-aware-attention-mechanism-2212.13703</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-a-musical-note-position-aware-attention-mechanism-2212.13703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-a-musical-note-position-aware-attention-mechanism-2212.13703"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-active-noise-control-system-based-on-a-block-diffusion-fxlms-algorithm-with-bidirectional-communication-2212.13777</loc><lastmod>2022-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-active-noise-control-system-based-on-a-block-diffusion-fxlms-algorithm-with-bidirectional-communication-2212.13777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-active-noise-control-system-based-on-a-block-diffusion-fxlms-algorithm-with-bidirectional-communication-2212.13777"/></url>
<url><loc>https://scifaro.com/en/abs/styletts-vc-one-shot-voice-conversion-by-knowledge-transfer-from-style-based-tts-models-2212.14227</loc><lastmod>2023-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styletts-vc-one-shot-voice-conversion-by-knowledge-transfer-from-style-based-tts-models-2212.14227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styletts-vc-one-shot-voice-conversion-by-knowledge-transfer-from-style-based-tts-models-2212.14227"/></url>
<url><loc>https://scifaro.com/en/abs/resgrad-residual-denoising-diffusion-probabilistic-models-for-text-to-speech-2212.14518</loc><lastmod>2023-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resgrad-residual-denoising-diffusion-probabilistic-models-for-text-to-speech-2212.14518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resgrad-residual-denoising-diffusion-probabilistic-models-for-text-to-speech-2212.14518"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-acoustic-scene-mapping-based-on-acoustic-features-and-dimensionality-reduction-2301.00448</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-acoustic-scene-mapping-based-on-acoustic-features-and-dimensionality-reduction-2301.00448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-acoustic-scene-mapping-based-on-acoustic-features-and-dimensionality-reduction-2301.00448"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-the-selection-bias-in-voice-assistance-training-voice-assistance-model-in-python-with-equal-data-selection-2301.00646</loc><lastmod>2023-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-the-selection-bias-in-voice-assistance-training-voice-assistance-model-in-python-with-equal-data-selection-2301.00646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-the-selection-bias-in-voice-assistance-training-voice-assistance-model-in-python-with-equal-data-selection-2301.00646"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-speech-representation-learning-with-low-bit-quantization-2301.00652</loc><lastmod>2023-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-speech-representation-learning-with-low-bit-quantization-2301.00652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-speech-representation-learning-with-low-bit-quantization-2301.00652"/></url>
<url><loc>https://scifaro.com/en/abs/trinet-stabilizing-self-supervised-learning-from-complete-or-slow-collapse-on-asr-2301.00656</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trinet-stabilizing-self-supervised-learning-from-complete-or-slow-collapse-on-asr-2301.00656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trinet-stabilizing-self-supervised-learning-from-complete-or-slow-collapse-on-asr-2301.00656"/></url>
<url><loc>https://scifaro.com/en/abs/mntts2-an-open-source-multi-speaker-mongolian-text-to-speech-synthesis-dataset-2301.00657</loc><lastmod>2023-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mntts2-an-open-source-multi-speaker-mongolian-text-to-speech-synthesis-dataset-2301.00657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mntts2-an-open-source-multi-speaker-mongolian-text-to-speech-synthesis-dataset-2301.00657"/></url>
<url><loc>https://scifaro.com/en/abs/hyperuniform-disordered-parametric-loudspeaker-array-2301.00833</loc><lastmod>2023-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperuniform-disordered-parametric-loudspeaker-array-2301.00833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperuniform-disordered-parametric-loudspeaker-array-2301.00833"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-the-rhythm-from-lyrics-for-melody-generation-of-pop-song-2301.01361</loc><lastmod>2023-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-the-rhythm-from-lyrics-for-melody-generation-of-pop-song-2301.01361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-the-rhythm-from-lyrics-for-melody-generation-of-pop-song-2301.01361"/></url>
<url><loc>https://scifaro.com/en/abs/grid-based-decimation-for-wavelet-transforms-with-stably-invertible-implementation-2301.01640</loc><lastmod>2023-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/grid-based-decimation-for-wavelet-transforms-with-stably-invertible-implementation-2301.01640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/grid-based-decimation-for-wavelet-transforms-with-stably-invertible-implementation-2301.01640"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-sound-event-detection-and-classification-of-great-ape-calls-using-neural-networks-2301.02214</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-sound-event-detection-and-classification-of-great-ape-calls-using-neural-networks-2301.02214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-sound-event-detection-and-classification-of-great-ape-calls-using-neural-networks-2301.02214"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-based-on-frame-level-sequence-to-sequence-models-considering-vocal-timing-deviation-2301.02262</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-frame-level-sequence-to-sequence-models-considering-vocal-timing-deviation-2301.02262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-based-on-frame-level-sequence-to-sequence-models-considering-vocal-timing-deviation-2301.02262"/></url>
<url><loc>https://scifaro.com/en/abs/using-external-off-policy-speech-to-text-mappings-in-contextual-end-to-end-automated-speech-recognition-2301.02736</loc><lastmod>2023-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-external-off-policy-speech-to-text-mappings-in-contextual-end-to-end-automated-speech-recognition-2301.02736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-external-off-policy-speech-to-text-mappings-in-contextual-end-to-end-automated-speech-recognition-2301.02736"/></url>
<url><loc>https://scifaro.com/en/abs/modelling-low-resource-accents-without-accent-specific-tts-frontend-2301.04606</loc><lastmod>2023-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modelling-low-resource-accents-without-accent-specific-tts-frontend-2301.04606"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modelling-low-resource-accents-without-accent-specific-tts-frontend-2301.04606"/></url>
<url><loc>https://scifaro.com/en/abs/rock-guitar-tablature-generation-via-natural-language-processing-2301.05295</loc><lastmod>2023-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rock-guitar-tablature-generation-via-natural-language-processing-2301.05295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rock-guitar-tablature-generation-via-natural-language-processing-2301.05295"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-alzheimer-s-dementia-recognition-through-spontaneous-speech-a-signal-processing-grand-challenge-2301.05562</loc><lastmod>2023-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-alzheimer-s-dementia-recognition-through-spontaneous-speech-a-signal-processing-grand-challenge-2301.05562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-alzheimer-s-dementia-recognition-through-spontaneous-speech-a-signal-processing-grand-challenge-2301.05562"/></url>
<url><loc>https://scifaro.com/en/abs/modulation-spectral-features-for-speech-emotion-recognition-using-deep-neural-networks-2301.05868</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulation-spectral-features-for-speech-emotion-recognition-using-deep-neural-networks-2301.05868"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulation-spectral-features-for-speech-emotion-recognition-using-deep-neural-networks-2301.05868"/></url>
<url><loc>https://scifaro.com/en/abs/multi-resolution-location-based-training-for-multi-channel-continuous-speech-separation-2301.06458</loc><lastmod>2023-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-resolution-location-based-training-for-multi-channel-continuous-speech-separation-2301.06458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-resolution-location-based-training-for-multi-channel-continuous-speech-separation-2301.06458"/></url>
<url><loc>https://scifaro.com/en/abs/towards-voice-reconstruction-from-eeg-during-imagined-speech-2301.07173</loc><lastmod>2023-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-voice-reconstruction-from-eeg-during-imagined-speech-2301.07173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-voice-reconstruction-from-eeg-during-imagined-speech-2301.07173"/></url>
<url><loc>https://scifaro.com/en/abs/dereverberation-in-acoustic-sensor-networks-using-weighted-prediction-error-with-microphone-dependent-prediction-delays-2301.07649</loc><lastmod>2026-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dereverberation-in-acoustic-sensor-networks-using-weighted-prediction-error-with-microphone-dependent-prediction-delays-2301.07649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dereverberation-in-acoustic-sensor-networks-using-weighted-prediction-error-with-microphone-dependent-prediction-delays-2301.07649"/></url>
<url><loc>https://scifaro.com/en/abs/new-challenges-for-content-privacy-in-speech-and-audio-2301.08925</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/new-challenges-for-content-privacy-in-speech-and-audio-2301.08925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/new-challenges-for-content-privacy-in-speech-and-audio-2301.08925"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-speaker-embeddings-with-adversarial-multi-task-learning-for-age-group-classification-2301.09058</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-speaker-embeddings-with-adversarial-multi-task-learning-for-age-group-classification-2301.09058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-speaker-embeddings-with-adversarial-multi-task-learning-for-age-group-classification-2301.09058"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-of-source-and-receiver-positions-room-geometry-and-reflection-coefficients-from-a-single-room-impulse-response-2301.09198</loc><lastmod>2023-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-of-source-and-receiver-positions-room-geometry-and-reflection-coefficients-from-a-single-room-impulse-response-2301.09198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-of-source-and-receiver-positions-room-geometry-and-reflection-coefficients-from-a-single-room-impulse-response-2301.09198"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-evaluation-of-listener-envelopment-using-spatial-granular-synthesis-2301.10210</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-evaluation-of-listener-envelopment-using-spatial-granular-synthesis-2301.10210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-evaluation-of-listener-envelopment-using-spatial-granular-synthesis-2301.10210"/></url>
<url><loc>https://scifaro.com/en/abs/separate-and-diffuse-using-a-pretrained-diffusion-model-for-improving-source-separation-2301.10752</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-and-diffuse-using-a-pretrained-diffusion-model-for-improving-source-separation-2301.10752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-and-diffuse-using-a-pretrained-diffusion-model-for-improving-source-separation-2301.10752"/></url>
<url><loc>https://scifaro.com/en/abs/a-simple-model-for-pink-noise-from-amplitude-modulations-2301.11176</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-simple-model-for-pink-noise-from-amplitude-modulations-2301.11176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-simple-model-for-pink-noise-from-amplitude-modulations-2301.11176"/></url>
<url><loc>https://scifaro.com/en/abs/bayesspeech-a-bayesian-transformer-network-for-automatic-speech-recognition-2301.11276</loc><lastmod>2023-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesspeech-a-bayesian-transformer-network-for-automatic-speech-recognition-2301.11276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesspeech-a-bayesian-transformer-network-for-automatic-speech-recognition-2301.11276"/></url>
<url><loc>https://scifaro.com/en/abs/on-granularity-of-prosodic-representations-in-expressive-text-to-speech-2301.11446</loc><lastmod>2023-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-granularity-of-prosodic-representations-in-expressive-text-to-speech-2301.11446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-granularity-of-prosodic-representations-in-expressive-text-to-speech-2301.11446"/></url>
<url><loc>https://scifaro.com/en/abs/cross-domain-neural-pitch-and-periodicity-estimation-2301.12258</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-domain-neural-pitch-and-periodicity-estimation-2301.12258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-domain-neural-pitch-and-periodicity-estimation-2301.12258"/></url>
<url><loc>https://scifaro.com/en/abs/neuralkalman-a-learnable-kalman-filter-for-acoustic-echo-cancellation-2301.12363</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuralkalman-a-learnable-kalman-filter-for-acoustic-echo-cancellation-2301.12363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuralkalman-a-learnable-kalman-filter-for-acoustic-echo-cancellation-2301.12363"/></url>
<url><loc>https://scifaro.com/en/abs/learning-to-speak-from-text-zero-shot-multilingual-text-to-speech-with-unsupervised-text-pretraining-2301.12596</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-to-speak-from-text-zero-shot-multilingual-text-to-speech-with-unsupervised-text-pretraining-2301.12596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-to-speak-from-text-zero-shot-multilingual-text-to-speech-with-unsupervised-text-pretraining-2301.12596"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-acoustic-perception-for-automotive-applications-2301.12808</loc><lastmod>2023-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-acoustic-perception-for-automotive-applications-2301.12808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-acoustic-perception-for-automotive-applications-2301.12808"/></url>
<url><loc>https://scifaro.com/en/abs/myriad-a-multi-array-room-acoustic-database-2301.13057</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/myriad-a-multi-array-room-acoustic-database-2301.13057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/myriad-a-multi-array-room-acoustic-database-2301.13057"/></url>
<url><loc>https://scifaro.com/en/abs/neural-target-speech-extraction-an-overview-2301.13341</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-target-speech-extraction-an-overview-2301.13341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-target-speech-extraction-an-overview-2301.13341"/></url>
<url><loc>https://scifaro.com/en/abs/relating-eeg-to-continuous-speech-using-deep-neural-networks-a-review-2302.01736</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relating-eeg-to-continuous-speech-using-deep-neural-networks-a-review-2302.01736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relating-eeg-to-continuous-speech-using-deep-neural-networks-a-review-2302.01736"/></url>
<url><loc>https://scifaro.com/en/abs/machine-learning-extreme-acoustic-non-reciprocity-in-a-linear-waveguide-with-multiple-nonlinear-asymmetric-gates-2302.01746</loc><lastmod>2023-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/machine-learning-extreme-acoustic-non-reciprocity-in-a-linear-waveguide-with-multiple-nonlinear-asymmetric-gates-2302.01746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/machine-learning-extreme-acoustic-non-reciprocity-in-a-linear-waveguide-with-multiple-nonlinear-asymmetric-gates-2302.01746"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-fusion-techniques-for-utterance-level-emotion-recognition-from-text-and-speech-2302.02447</loc><lastmod>2023-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-fusion-techniques-for-utterance-level-emotion-recognition-from-text-and-speech-2302.02447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-fusion-techniques-for-utterance-level-emotion-recognition-from-text-and-speech-2302.02447"/></url>
<url><loc>https://scifaro.com/en/abs/residual-information-in-deep-speaker-embedding-architectures-2302.02742</loc><lastmod>2023-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/residual-information-in-deep-speaker-embedding-architectures-2302.02742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/residual-information-in-deep-speaker-embedding-architectures-2302.02742"/></url>
<url><loc>https://scifaro.com/en/abs/listen2scene-interactive-material-aware-binaural-sound-propagation-for-reconstructed-3d-scenes-2302.02809</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen2scene-interactive-material-aware-binaural-sound-propagation-for-reconstructed-3d-scenes-2302.02809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen2scene-interactive-material-aware-binaural-sound-propagation-for-reconstructed-3d-scenes-2302.02809"/></url>
<url><loc>https://scifaro.com/en/abs/masking-kernel-for-learning-energy-efficient-representations-for-speaker-recognition-and-mobile-health-2302.04161</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masking-kernel-for-learning-energy-efficient-representations-for-speaker-recognition-and-mobile-health-2302.04161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masking-kernel-for-learning-energy-efficient-representations-for-speaker-recognition-and-mobile-health-2302.04161"/></url>
<url><loc>https://scifaro.com/en/abs/a-vector-quantized-approach-for-text-to-speech-synthesis-on-real-world-spontaneous-speech-2302.04215</loc><lastmod>2023-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-vector-quantized-approach-for-text-to-speech-synthesis-on-real-world-spontaneous-speech-2302.04215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-vector-quantized-approach-for-text-to-speech-synthesis-on-real-world-spontaneous-speech-2302.04215"/></url>
<url><loc>https://scifaro.com/en/abs/a-composite-t60-regression-and-classification-approach-for-speech-dereverberation-2302.04932</loc><lastmod>2023-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-composite-t60-regression-and-classification-approach-for-speech-dereverberation-2302.04932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-composite-t60-regression-and-classification-approach-for-speech-dereverberation-2302.04932"/></url>
<url><loc>https://scifaro.com/en/abs/cross-corpora-spoken-language-identification-with-domain-diversification-and-generalization-2302.05110</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-corpora-spoken-language-identification-with-domain-diversification-and-generalization-2302.05110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-corpora-spoken-language-identification-with-domain-diversification-and-generalization-2302.05110"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-language-change-detection-inspired-by-speaker-change-detection-2302.05265</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-language-change-detection-inspired-by-speaker-change-detection-2302.05265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-language-change-detection-inspired-by-speaker-change-detection-2302.05265"/></url>
<url><loc>https://scifaro.com/en/abs/asdf-a-differential-testing-framework-for-automatic-speech-recognition-systems-2302.05582</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asdf-a-differential-testing-framework-for-automatic-speech-recognition-systems-2302.05582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asdf-a-differential-testing-framework-for-automatic-speech-recognition-systems-2302.05582"/></url>
<url><loc>https://scifaro.com/en/abs/improved-decoding-of-attentional-selection-in-multi-talker-environments-with-self-supervised-learned-speech-representation-2302.05756</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-decoding-of-attentional-selection-in-multi-talker-environments-with-self-supervised-learned-speech-representation-2302.05756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-decoding-of-attentional-selection-in-multi-talker-environments-with-self-supervised-learned-speech-representation-2302.05756"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-small-footprint-hybrid-hmm-hifigan-based-system-for-speech-synthesis-in-indian-languages-2302.06227</loc><lastmod>2023-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-small-footprint-hybrid-hmm-hifigan-based-system-for-speech-synthesis-in-indian-languages-2302.06227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-small-footprint-hybrid-hmm-hifigan-based-system-for-speech-synthesis-in-indian-languages-2302.06227"/></url>
<url><loc>https://scifaro.com/en/abs/av-data2vec-self-supervised-learning-of-audio-visual-speech-representations-with-contextualized-target-representations-2302.06419</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-data2vec-self-supervised-learning-of-audio-visual-speech-representations-with-contextualized-target-representations-2302.06419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-data2vec-self-supervised-learning-of-audio-visual-speech-representations-with-contextualized-target-representations-2302.06419"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-acoustic-to-articulatory-speech-inversion-2302.06774</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-acoustic-to-articulatory-speech-inversion-2302.06774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-acoustic-to-articulatory-speech-inversion-2302.06774"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-contrastive-learning-from-musical-audio-2302.07077</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-contrastive-learning-from-musical-audio-2302.07077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-contrastive-learning-from-musical-audio-2302.07077"/></url>
<url><loc>https://scifaro.com/en/abs/a-dataset-for-audio-visual-sound-event-detection-in-movies-2302.07315</loc><lastmod>2023-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dataset-for-audio-visual-sound-event-detection-in-movies-2302.07315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dataset-for-audio-visual-sound-event-detection-in-movies-2302.07315"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-score-based-speaker-adaptation-of-conformer-speech-recognition-systems-2302.07521</loc><lastmod>2023-02-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-score-based-speaker-adaptation-of-conformer-speech-recognition-systems-2302.07521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-score-based-speaker-adaptation-of-conformer-speech-recognition-systems-2302.07521"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-blind-speech-copy-move-detection-and-localization-in-noise-2302.07584</loc><lastmod>2023-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-blind-speech-copy-move-detection-and-localization-in-noise-2302.07584"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-blind-speech-copy-move-detection-and-localization-in-noise-2302.07584"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-target-speaker-extraction-with-refinement-the-wavlab-submission-to-the-second-clarity-enhancement-challenge-2302.07928</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-target-speaker-extraction-with-refinement-the-wavlab-submission-to-the-second-clarity-enhancement-challenge-2302.07928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-target-speaker-extraction-with-refinement-the-wavlab-submission-to-the-second-clarity-enhancement-challenge-2302.07928"/></url>
<url><loc>https://scifaro.com/en/abs/deepspace-dynamic-spatial-and-source-cue-based-source-separation-for-dialog-enhancement-2302.08202</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepspace-dynamic-spatial-and-source-cue-based-source-separation-for-dialog-enhancement-2302.08202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepspace-dynamic-spatial-and-source-cue-based-source-separation-for-dialog-enhancement-2302.08202"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-multi-granularity-vector-quantization-2302.08342</loc><lastmod>2023-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-multi-granularity-vector-quantization-2302.08342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-multi-granularity-vector-quantization-2302.08342"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-change-detection-for-transformer-transducer-asr-2302.08549</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-change-detection-for-transformer-transducer-asr-2302.08549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-change-detection-for-transformer-transducer-asr-2302.08549"/></url>
<url><loc>https://scifaro.com/en/abs/adaptable-end-to-end-asr-models-using-replaceable-internal-lms-and-residual-softmax-2302.08579</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptable-end-to-end-asr-models-using-replaceable-internal-lms-and-residual-softmax-2302.08579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptable-end-to-end-asr-models-using-replaceable-internal-lms-and-residual-softmax-2302.08579"/></url>
<url><loc>https://scifaro.com/en/abs/jeit-joint-end-to-end-model-and-internal-language-model-training-for-speech-recognition-2302.08583</loc><lastmod>2023-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jeit-joint-end-to-end-model-and-internal-language-model-training-for-speech-recognition-2302.08583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jeit-joint-end-to-end-model-and-internal-language-model-training-for-speech-recognition-2302.08583"/></url>
<url><loc>https://scifaro.com/en/abs/improving-transformer-based-networks-with-locality-for-automatic-speaker-verification-2302.08639</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-transformer-based-networks-with-locality-for-automatic-speaker-verification-2302.08639"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-transformer-based-networks-with-locality-for-automatic-speaker-verification-2302.08639"/></url>
<url><loc>https://scifaro.com/en/abs/deep-ahs-a-deep-learning-approach-to-acoustic-howling-suppression-2302.09252</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-ahs-a-deep-learning-approach-to-acoustic-howling-suppression-2302.09252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-ahs-a-deep-learning-approach-to-acoustic-howling-suppression-2302.09252"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dimensional-frequency-dynamic-convolution-with-confident-mean-teacher-for-sound-event-detection-2302.09256</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dimensional-frequency-dynamic-convolution-with-confident-mean-teacher-for-sound-event-detection-2302.09256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dimensional-frequency-dynamic-convolution-with-confident-mean-teacher-for-sound-event-detection-2302.09256"/></url>
<url><loc>https://scifaro.com/en/abs/front-end-adapter-adapting-front-end-input-of-speech-based-self-supervised-learning-for-speech-recognition-2302.09331</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/front-end-adapter-adapting-front-end-input-of-speech-based-self-supervised-learning-for-speech-recognition-2302.09331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/front-end-adapter-adapting-front-end-input-of-speech-based-self-supervised-learning-for-speech-recognition-2302.09331"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-language-change-detection-using-wav2vec2-and-whisper-2302.09381</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-language-change-detection-using-wav2vec2-and-whisper-2302.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-language-change-detection-using-wav2vec2-and-whisper-2302.09381"/></url>
<url><loc>https://scifaro.com/en/abs/robustdistiller-compressing-universal-speech-representations-for-enhanced-environment-robustness-2302.09437</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustdistiller-compressing-universal-speech-representations-for-enhanced-environment-robustness-2302.09437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustdistiller-compressing-universal-speech-representations-for-enhanced-environment-robustness-2302.09437"/></url>
<url><loc>https://scifaro.com/en/abs/probabilistic-back-ends-for-online-speaker-recognition-and-clustering-2302.09523</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probabilistic-back-ends-for-online-speaker-recognition-and-clustering-2302.09523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probabilistic-back-ends-for-online-speaker-recognition-and-clustering-2302.09523"/></url>
<url><loc>https://scifaro.com/en/abs/synergy-between-human-and-machine-approaches-to-sound-scene-recognition-and-processing-an-overview-of-icassp-special-session-2302.09719</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synergy-between-human-and-machine-approaches-to-sound-scene-recognition-and-processing-an-overview-of-icassp-special-session-2302.09719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synergy-between-human-and-machine-approaches-to-sound-scene-recognition-and-processing-an-overview-of-icassp-special-session-2302.09719"/></url>
<url><loc>https://scifaro.com/en/abs/an-asr-free-fluency-scoring-approach-with-self-supervised-learning-2302.09928</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-asr-free-fluency-scoring-approach-with-self-supervised-learning-2302.09928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-asr-free-fluency-scoring-approach-with-self-supervised-learning-2302.09928"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-speech-enhancement-combining-band-split-rnn-and-speaker-attentive-module-2302.09953</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-speech-enhancement-combining-band-split-rnn-and-speaker-attentive-module-2302.09953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-speech-enhancement-combining-band-split-rnn-and-speaker-attentive-module-2302.09953"/></url>
<url><loc>https://scifaro.com/en/abs/learnable-frontends-that-do-not-learn-quantifying-sensitivity-to-filterbank-initialisation-2302.10014</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnable-frontends-that-do-not-learn-quantifying-sensitivity-to-filterbank-initialisation-2302.10014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnable-frontends-that-do-not-learn-quantifying-sensitivity-to-filterbank-initialisation-2302.10014"/></url>
<url><loc>https://scifaro.com/en/abs/stabilising-and-accelerating-light-gated-recurrent-units-for-automatic-speech-recognition-2302.10144</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stabilising-and-accelerating-light-gated-recurrent-units-for-automatic-speech-recognition-2302.10144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stabilising-and-accelerating-light-gated-recurrent-units-for-automatic-speech-recognition-2302.10144"/></url>
<url><loc>https://scifaro.com/en/abs/a-dnn-based-normalized-time-frequency-weighted-criterion-for-robust-wideband-doa-estimation-2302.10147</loc><lastmod>2023-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dnn-based-normalized-time-frequency-weighted-criterion-for-robust-wideband-doa-estimation-2302.10147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dnn-based-normalized-time-frequency-weighted-criterion-for-robust-wideband-doa-estimation-2302.10147"/></url>
<url><loc>https://scifaro.com/en/abs/e2e-spoken-entity-extraction-for-virtual-agents-2302.10186</loc><lastmod>2023-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e2e-spoken-entity-extraction-for-virtual-agents-2302.10186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e2e-spoken-entity-extraction-for-virtual-agents-2302.10186"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-enhancement-using-spectral-subtraction-with-minimum-statistics-and-spectral-floor-2302.10313</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-enhancement-using-spectral-subtraction-with-minimum-statistics-and-spectral-floor-2302.10313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-enhancement-using-spectral-subtraction-with-minimum-statistics-and-spectral-floor-2302.10313"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-enhancement-with-dynamic-attention-span-2302.10377</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-enhancement-with-dynamic-attention-span-2302.10377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-enhancement-with-dynamic-attention-span-2302.10377"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-phone-level-linguistic-acoustic-similarity-for-utterance-level-pronunciation-scoring-2302.10444</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-phone-level-linguistic-acoustic-similarity-for-utterance-level-pronunciation-scoring-2302.10444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-phone-level-linguistic-acoustic-similarity-for-utterance-level-pronunciation-scoring-2302.10444"/></url>
<url><loc>https://scifaro.com/en/abs/federated-learning-for-asr-based-on-wav2vec-2-0-2302.10790</loc><lastmod>2023-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-learning-for-asr-based-on-wav2vec-2-0-2302.10790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-learning-for-asr-based-on-wav2vec-2-0-2302.10790"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-speech-enhancement-and-separation-with-gradient-modulation-for-end-to-end-noise-robust-speech-separation-2302.11131</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-speech-enhancement-and-separation-with-gradient-modulation-for-end-to-end-noise-robust-speech-separation-2302.11131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-speech-enhancement-and-separation-with-gradient-modulation-for-end-to-end-noise-robust-speech-separation-2302.11131"/></url>
<url><loc>https://scifaro.com/en/abs/uml-a-universal-monolingual-output-layer-for-multilingual-asr-2302.11186</loc><lastmod>2023-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uml-a-universal-monolingual-output-layer-for-multilingual-asr-2302.11186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uml-a-universal-monolingual-output-layer-for-multilingual-asr-2302.11186"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-representation-learning-for-acoustic-parameter-estimation-2302.11205</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-representation-learning-for-acoustic-parameter-estimation-2302.11205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-representation-learning-for-acoustic-parameter-estimation-2302.11205"/></url>
<url><loc>https://scifaro.com/en/abs/gradient-remedy-for-multi-task-learning-in-end-to-end-noise-robust-speech-recognition-2302.11362</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gradient-remedy-for-multi-task-learning-in-end-to-end-noise-robust-speech-recognition-2302.11362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gradient-remedy-for-multi-task-learning-in-end-to-end-noise-robust-speech-recognition-2302.11362"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-uncertainty-from-speaker-embedding-estimation-to-speaker-verification-2302.11763</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-uncertainty-from-speaker-embedding-estimation-to-speaker-verification-2302.11763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-uncertainty-from-speaker-embedding-estimation-to-speaker-verification-2302.11763"/></url>
<url><loc>https://scifaro.com/en/abs/a-framework-for-unified-real-time-personalized-and-non-personalized-speech-enhancement-2302.11768</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-framework-for-unified-real-time-personalized-and-non-personalized-speech-enhancement-2302.11768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-framework-for-unified-real-time-personalized-and-non-personalized-speech-enhancement-2302.11768"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-bin-wise-single-channel-speech-presence-probability-estimation-using-multiple-dnns-2302.12048</loc><lastmod>2023-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-bin-wise-single-channel-speech-presence-probability-estimation-using-multiple-dnns-2302.12048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-bin-wise-single-channel-speech-presence-probability-estimation-using-multiple-dnns-2302.12048"/></url>
<url><loc>https://scifaro.com/en/abs/factual-consistency-oriented-speech-recognition-2302.12369</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/factual-consistency-oriented-speech-recognition-2302.12369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/factual-consistency-oriented-speech-recognition-2302.12369"/></url>
<url><loc>https://scifaro.com/en/abs/pits-variational-pitch-inference-without-fundamental-frequency-for-end-to-end-pitch-controllable-tts-2302.12391</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pits-variational-pitch-inference-without-fundamental-frequency-for-end-to-end-pitch-controllable-tts-2302.12391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pits-variational-pitch-inference-without-fundamental-frequency-for-end-to-end-pitch-controllable-tts-2302.12391"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-knowledge-distillation-of-self-supervised-speech-models-2302.12757</loc><lastmod>2023-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-knowledge-distillation-of-self-supervised-speech-models-2302.12757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-knowledge-distillation-of-self-supervised-speech-models-2302.12757"/></url>
<url><loc>https://scifaro.com/en/abs/time-variance-aware-real-time-speech-enhancement-2302.13063</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-variance-aware-real-time-speech-enhancement-2302.13063"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-variance-aware-real-time-speech-enhancement-2302.13063"/></url>
<url><loc>https://scifaro.com/en/abs/i-msv-2022-indic-multilingual-and-multi-sensor-speaker-verification-challenge-2302.13209</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/i-msv-2022-indic-multilingual-and-multi-sensor-speaker-verification-challenge-2302.13209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/i-msv-2022-indic-multilingual-and-multi-sensor-speaker-verification-challenge-2302.13209"/></url>
<url><loc>https://scifaro.com/en/abs/dfsnet-a-steerable-neural-beamformer-invariant-to-microphone-array-configuration-for-real-time-low-latency-speech-enhancement-2302.13407</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dfsnet-a-steerable-neural-beamformer-invariant-to-microphone-array-configuration-for-real-time-low-latency-speech-enhancement-2302.13407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dfsnet-a-steerable-neural-beamformer-invariant-to-microphone-array-configuration-for-real-time-low-latency-speech-enhancement-2302.13407"/></url>
<url><loc>https://scifaro.com/en/abs/varianceflow-high-quality-and-controllable-text-to-speech-using-variance-information-via-normalizing-flow-2302.13458</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/varianceflow-high-quality-and-controllable-text-to-speech-using-variance-information-via-normalizing-flow-2302.13458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/varianceflow-high-quality-and-controllable-text-to-speech-using-variance-information-via-normalizing-flow-2302.13458"/></url>
<url><loc>https://scifaro.com/en/abs/complex-clipping-for-improved-generalization-in-machine-learning-2302.13527</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complex-clipping-for-improved-generalization-in-machine-learning-2302.13527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complex-clipping-for-improved-generalization-in-machine-learning-2302.13527"/></url>
<url><loc>https://scifaro.com/en/abs/duration-aware-pause-insertion-using-pre-trained-language-model-for-multi-speaker-text-to-speech-2302.13652</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/duration-aware-pause-insertion-using-pre-trained-language-model-for-multi-speaker-text-to-speech-2302.13652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/duration-aware-pause-insertion-using-pre-trained-language-model-for-multi-speaker-text-to-speech-2302.13652"/></url>
<url><loc>https://scifaro.com/en/abs/mole-mixture-of-language-experts-for-multi-lingual-automatic-speech-recognition-2302.13750</loc><lastmod>2023-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mole-mixture-of-language-experts-for-multi-lingual-automatic-speech-recognition-2302.13750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mole-mixture-of-language-experts-for-multi-lingual-automatic-speech-recognition-2302.13750"/></url>
<url><loc>https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-asr-using-integrated-text-to-mel-spectrogram-generator-2302.14036</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-asr-using-integrated-text-to-mel-spectrogram-generator-2302.14036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-only-domain-adaptation-for-end-to-end-asr-using-integrated-text-to-mel-spectrogram-generator-2302.14036"/></url>
<url><loc>https://scifaro.com/en/abs/diagonal-state-space-augmented-transformers-for-speech-recognition-2302.14120</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diagonal-state-space-augmented-transformers-for-speech-recognition-2302.14120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diagonal-state-space-augmented-transformers-for-speech-recognition-2302.14120"/></url>
<url><loc>https://scifaro.com/en/abs/training-sound-event-detection-with-soft-labels-from-crowdsourced-annotations-2302.14572</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-sound-event-detection-with-soft-labels-from-crowdsourced-annotations-2302.14572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-sound-event-detection-with-soft-labels-from-crowdsourced-annotations-2302.14572"/></url>
<url><loc>https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-for-paralinguistic-speech-processing-2302.14638</loc><lastmod>2023-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-for-paralinguistic-speech-processing-2302.14638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechformer-a-hierarchical-efficient-framework-for-paralinguistic-speech-processing-2302.14638"/></url>
<url><loc>https://scifaro.com/en/abs/reducing-the-prior-mismatch-of-stochastic-differential-equations-for-diffusion-based-speech-enhancement-2302.14748</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reducing-the-prior-mismatch-of-stochastic-differential-equations-for-diffusion-based-speech-enhancement-2302.14748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reducing-the-prior-mismatch-of-stochastic-differential-equations-for-diffusion-based-speech-enhancement-2302.14748"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-learning-of-acoustic-scenes-and-sound-events-2302.14815</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-learning-of-acoustic-scenes-and-sound-events-2302.14815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-learning-of-acoustic-scenes-and-sound-events-2302.14815"/></url>
<url><loc>https://scifaro.com/en/abs/improving-medical-speech-to-text-accuracy-with-vision-language-pre-training-model-2303.00091</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-medical-speech-to-text-accuracy-with-vision-language-pre-training-model-2303.00091"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-medical-speech-to-text-accuracy-with-vision-language-pre-training-model-2303.00091"/></url>
<url><loc>https://scifaro.com/en/abs/first-shot-anomaly-sound-detection-for-machine-condition-monitoring-a-domain-generalization-baseline-2303.00455</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/first-shot-anomaly-sound-detection-for-machine-condition-monitoring-a-domain-generalization-baseline-2303.00455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/first-shot-anomaly-sound-detection-for-machine-condition-monitoring-a-domain-generalization-baseline-2303.00455"/></url>
<url><loc>https://scifaro.com/en/abs/extending-dnn-based-multiplicative-masking-to-deep-subband-filtering-for-improved-dereverberation-2303.00529</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-dnn-based-multiplicative-masking-to-deep-subband-filtering-for-improved-dereverberation-2303.00529"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-dnn-based-multiplicative-masking-to-deep-subband-filtering-for-improved-dereverberation-2303.00529"/></url>
<url><loc>https://scifaro.com/en/abs/towards-domain-generalisation-in-asr-with-elitist-sampling-and-ensemble-knowledge-distillation-2303.00550</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-domain-generalisation-in-asr-with-elitist-sampling-and-ensemble-knowledge-distillation-2303.00550"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-domain-generalisation-in-asr-with-elitist-sampling-and-ensemble-knowledge-distillation-2303.00550"/></url>
<url><loc>https://scifaro.com/en/abs/audb-sharing-and-versioning-of-audio-and-annotation-data-in-python-2303.00645</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audb-sharing-and-versioning-of-audio-and-annotation-data-in-python-2303.00645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audb-sharing-and-versioning-of-audio-and-annotation-data-in-python-2303.00645"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-redundancy-in-multiple-audio-signals-for-far-field-speech-recognition-2303.00692</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-redundancy-in-multiple-audio-signals-for-far-field-speech-recognition-2303.00692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-redundancy-in-multiple-audio-signals-for-far-field-speech-recognition-2303.00692"/></url>
<url><loc>https://scifaro.com/en/abs/speechprompt-v2-prompt-tuning-for-speech-classification-tasks-2303.00733</loc><lastmod>2023-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechprompt-v2-prompt-tuning-for-speech-classification-tasks-2303.00733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechprompt-v2-prompt-tuning-for-speech-classification-tasks-2303.00733"/></url>
<url><loc>https://scifaro.com/en/abs/ego-noise-reduction-of-a-mobile-robot-using-noise-spatial-covariance-matrix-learning-and-minimum-variance-distortionless-response-2303.00829</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ego-noise-reduction-of-a-mobile-robot-using-noise-spatial-covariance-matrix-learning-and-minimum-variance-distortionless-response-2303.00829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ego-noise-reduction-of-a-mobile-robot-using-noise-spatial-covariance-matrix-learning-and-minimum-variance-distortionless-response-2303.00829"/></url>
<url><loc>https://scifaro.com/en/abs/displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2303.00830</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2303.00830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2303.00830"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-audio-video-enhancement-with-a-microphone-array-and-headphones-2303.00949</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-audio-video-enhancement-with-a-microphone-array-and-headphones-2303.00949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-audio-video-enhancement-with-a-microphone-array-and-headphones-2303.00949"/></url>
<url><loc>https://scifaro.com/en/abs/improving-transformer-based-end-to-end-speaker-diarization-by-assigning-auxiliary-losses-to-attention-heads-2303.01192</loc><lastmod>2023-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-transformer-based-end-to-end-speaker-diarization-by-assigning-auxiliary-losses-to-attention-heads-2303.01192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-transformer-based-end-to-end-speaker-diarization-by-assigning-auxiliary-losses-to-attention-heads-2303.01192"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-into-the-adaptability-of-a-diffusion-based-tts-model-2303.01849</loc><lastmod>2023-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-into-the-adaptability-of-a-diffusion-based-tts-model-2303.01849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-into-the-adaptability-of-a-diffusion-based-tts-model-2303.01849"/></url>
<url><loc>https://scifaro.com/en/abs/fixed-point-quantization-aware-training-for-on-device-keyword-spotting-2303.02284</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fixed-point-quantization-aware-training-for-on-device-keyword-spotting-2303.02284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fixed-point-quantization-aware-training-for-on-device-keyword-spotting-2303.02284"/></url>
<url><loc>https://scifaro.com/en/abs/requirements-for-mass-adoption-of-assistive-listening-technology-by-the-general-public-2303.02523</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/requirements-for-mass-adoption-of-assistive-listening-technology-by-the-general-public-2303.02523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/requirements-for-mass-adoption-of-assistive-listening-technology-by-the-general-public-2303.02523"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representations-in-read-and-spontaneous-tts-2303.02719</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representations-in-read-and-spontaneous-tts-2303.02719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-self-supervised-speech-representations-in-read-and-spontaneous-tts-2303.02719"/></url>
<url><loc>https://scifaro.com/en/abs/foundationtts-text-to-speech-for-asr-customization-with-generative-language-model-2303.02939</loc><lastmod>2023-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foundationtts-text-to-speech-for-asr-customization-with-generative-language-model-2303.02939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foundationtts-text-to-speech-for-asr-customization-with-generative-language-model-2303.02939"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-alzheimer-s-detection-from-spontaneous-speech-2303.03049</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-alzheimer-s-detection-from-spontaneous-speech-2303.03049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-transfer-learning-for-alzheimer-s-detection-from-spontaneous-speech-2303.03049"/></url>
<url><loc>https://scifaro.com/en/abs/pre-trained-model-representations-and-their-robustness-against-noise-for-speech-emotion-analysis-2303.03177</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-trained-model-representations-and-their-robustness-against-noise-for-speech-emotion-analysis-2303.03177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-trained-model-representations-and-their-robustness-against-noise-for-speech-emotion-analysis-2303.03177"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-a-survey-2303.03329</loc><lastmod>2023-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-a-survey-2303.03329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-a-survey-2303.03329"/></url>
<url><loc>https://scifaro.com/en/abs/self-film-conditioning-gans-with-self-supervised-representations-for-bandwidth-extension-based-speaker-recognition-2303.03657</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-film-conditioning-gans-with-self-supervised-representations-for-bandwidth-extension-based-speaker-recognition-2303.03657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-film-conditioning-gans-with-self-supervised-representations-for-bandwidth-extension-based-speaker-recognition-2303.03657"/></url>
<url><loc>https://scifaro.com/en/abs/ast-sed-an-effective-sound-event-detection-method-based-on-audio-spectrogram-transformer-2303.03689</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ast-sed-an-effective-sound-event-detection-method-based-on-audio-spectrogram-transformer-2303.03689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ast-sed-an-effective-sound-event-detection-method-based-on-audio-spectrogram-transformer-2303.03689"/></url>
<url><loc>https://scifaro.com/en/abs/ts-sep-joint-diarization-and-separation-conditioned-on-estimated-speaker-embeddings-2303.03849</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ts-sep-joint-diarization-and-separation-conditioned-on-estimated-speaker-embeddings-2303.03849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ts-sep-joint-diarization-and-separation-conditioned-on-estimated-speaker-embeddings-2303.03849"/></url>
<url><loc>https://scifaro.com/en/abs/kernel-interpolation-of-acoustic-transfer-functions-with-adaptive-kernel-for-directed-and-residual-reverberations-2303.03869</loc><lastmod>2023-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kernel-interpolation-of-acoustic-transfer-functions-with-adaptive-kernel-for-directed-and-residual-reverberations-2303.03869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kernel-interpolation-of-acoustic-transfer-functions-with-adaptive-kernel-for-directed-and-residual-reverberations-2303.03869"/></url>
<url><loc>https://scifaro.com/en/abs/x-sepformer-end-to-end-speaker-extraction-network-with-explicit-optimization-on-speaker-confusion-2303.05023</loc><lastmod>2023-03-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/x-sepformer-end-to-end-speaker-extraction-network-with-explicit-optimization-on-speaker-confusion-2303.05023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/x-sepformer-end-to-end-speaker-extraction-network-with-explicit-optimization-on-speaker-confusion-2303.05023"/></url>
<url><loc>https://scifaro.com/en/abs/unfused-unsupervised-finetuning-using-self-supervised-distillation-2303.05668</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unfused-unsupervised-finetuning-using-self-supervised-distillation-2303.05668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unfused-unsupervised-finetuning-using-self-supervised-distillation-2303.05668"/></url>
<url><loc>https://scifaro.com/en/abs/clinical-bertscore-an-improved-measure-of-automatic-speech-recognition-performance-in-clinical-settings-2303.05737</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clinical-bertscore-an-improved-measure-of-automatic-speech-recognition-performance-in-clinical-settings-2303.05737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clinical-bertscore-an-improved-measure-of-automatic-speech-recognition-performance-in-clinical-settings-2303.05737"/></url>
<url><loc>https://scifaro.com/en/abs/distribution-preserving-source-separation-with-time-frequency-predictive-models-2303.05896</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distribution-preserving-source-separation-with-time-frequency-predictive-models-2303.05896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distribution-preserving-source-separation-with-time-frequency-predictive-models-2303.05896"/></url>
<url><loc>https://scifaro.com/en/abs/wav2vec-and-its-current-potential-to-automatic-speech-recognition-in-german-for-the-usage-in-digital-history-a-comparative-assessment-of-available-asr-technologies-for-the-use-in-cultural-heritage-contexts-2303.06026</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2vec-and-its-current-potential-to-automatic-speech-recognition-in-german-for-the-usage-in-digital-history-a-comparative-assessment-of-available-asr-technologies-for-the-use-in-cultural-heritage-contexts-2303.06026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2vec-and-its-current-potential-to-automatic-speech-recognition-in-german-for-the-usage-in-digital-history-a-comparative-assessment-of-available-asr-technologies-for-the-use-in-cultural-heritage-contexts-2303.06026"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-neural-network-for-image-to-audio-transformation-2303.06078</loc><lastmod>2023-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-neural-network-for-image-to-audio-transformation-2303.06078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-neural-network-for-image-to-audio-transformation-2303.06078"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-aslp-system-for-audio-visual-speech-recognition-in-misp-2022-challenge-2303.06341</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-aslp-system-for-audio-visual-speech-recognition-in-misp-2022-challenge-2303.06341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-aslp-system-for-audio-visual-speech-recognition-in-misp-2022-challenge-2303.06341"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-sub-band-network-for-deep-residual-echo-suppression-2303.06404</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-sub-band-network-for-deep-residual-echo-suppression-2303.06404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-sub-band-network-for-deep-residual-echo-suppression-2303.06404"/></url>
<url><loc>https://scifaro.com/en/abs/relating-eeg-recordings-to-speech-using-envelope-tracking-and-the-speech-ffr-2303.06435</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relating-eeg-recordings-to-speech-using-envelope-tracking-and-the-speech-ffr-2303.06435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relating-eeg-recordings-to-speech-using-envelope-tracking-and-the-speech-ffr-2303.06435"/></url>
<url><loc>https://scifaro.com/en/abs/transcription-free-filler-word-detection-with-neural-semi-crfs-2303.06475</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcription-free-filler-word-detection-with-neural-semi-crfs-2303.06475"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcription-free-filler-word-detection-with-neural-semi-crfs-2303.06475"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2023-speech-signal-improvement-challenge-2303.06566</loc><lastmod>2023-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2023-speech-signal-improvement-challenge-2303.06566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2023-speech-signal-improvement-challenge-2303.06566"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-strategies-for-faster-inference-using-speech-self-supervised-models-a-comparative-study-2303.06740</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-strategies-for-faster-inference-using-speech-self-supervised-models-a-comparative-study-2303.06740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-strategies-for-faster-inference-using-speech-self-supervised-models-a-comparative-study-2303.06740"/></url>
<url><loc>https://scifaro.com/en/abs/neural-diarization-with-non-autoregressive-intermediate-attractors-2303.06806</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-diarization-with-non-autoregressive-intermediate-attractors-2303.06806"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-diarization-with-non-autoregressive-intermediate-attractors-2303.06806"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-elevoc-personalized-speech-enhancement-system-for-icassp2023-dns-challenge-2303.06811</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-elevoc-personalized-speech-enhancement-system-for-icassp2023-dns-challenge-2303.06811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-elevoc-personalized-speech-enhancement-system-for-icassp2023-dns-challenge-2303.06811"/></url>
<url><loc>https://scifaro.com/en/abs/two-step-band-split-neural-network-approach-for-full-band-residual-echo-suppression-2303.06828</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-step-band-split-neural-network-approach-for-full-band-residual-echo-suppression-2303.06828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-step-band-split-neural-network-approach-for-full-band-residual-echo-suppression-2303.06828"/></url>
<url><loc>https://scifaro.com/en/abs/learning-based-robust-speaker-counting-and-separation-with-the-aid-of-spatial-coherence-2303.06867</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-based-robust-speaker-counting-and-separation-with-the-aid-of-spatial-coherence-2303.06867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-based-robust-speaker-counting-and-separation-with-the-aid-of-spatial-coherence-2303.06867"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-audio-visual-end-to-end-speech-enhancement-2303.07005</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-audio-visual-end-to-end-speech-enhancement-2303.07005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-audio-visual-end-to-end-speech-enhancement-2303.07005"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-dereverberation-noise-and-interferer-reduction-using-sparse-weighted-linearly-constrained-minimum-power-beamforming-2303.07027</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-dereverberation-noise-and-interferer-reduction-using-sparse-weighted-linearly-constrained-minimum-power-beamforming-2303.07027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-dereverberation-noise-and-interferer-reduction-using-sparse-weighted-linearly-constrained-minimum-power-beamforming-2303.07027"/></url>
<url><loc>https://scifaro.com/en/abs/can-spoofing-countermeasure-and-speaker-verification-systems-be-jointly-optimised-2303.07073</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-spoofing-countermeasure-and-speaker-verification-systems-be-jointly-optimised-2303.07073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-spoofing-countermeasure-and-speaker-verification-systems-be-jointly-optimised-2303.07073"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-speaker-separation-by-spatial-regions-2303.07143</loc><lastmod>2023-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-speaker-separation-by-spatial-regions-2303.07143"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-speaker-separation-by-spatial-regions-2303.07143"/></url>
<url><loc>https://scifaro.com/en/abs/a-processing-framework-to-access-large-quantities-of-whispered-speech-found-in-asmr-2303.07442</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-processing-framework-to-access-large-quantities-of-whispered-speech-found-in-asmr-2303.07442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-processing-framework-to-access-large-quantities-of-whispered-speech-found-in-asmr-2303.07442"/></url>
<url><loc>https://scifaro.com/en/abs/blind-acoustic-room-parameter-estimation-using-phase-features-2303.07449</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-acoustic-room-parameter-estimation-using-phase-features-2303.07449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-acoustic-room-parameter-estimation-using-phase-features-2303.07449"/></url>
<url><loc>https://scifaro.com/en/abs/online-binaural-speech-separation-of-moving-speakers-with-a-wavesplit-network-2303.07458</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-binaural-speech-separation-of-moving-speakers-with-a-wavesplit-network-2303.07458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-binaural-speech-separation-of-moving-speakers-with-a-wavesplit-network-2303.07458"/></url>
<url><loc>https://scifaro.com/en/abs/guided-speech-enhancement-network-2303.07486</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-speech-enhancement-network-2303.07486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-speech-enhancement-network-2303.07486"/></url>
<url><loc>https://scifaro.com/en/abs/speech-intelligibility-classifiers-from-550k-disordered-speech-samples-2303.07533</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-intelligibility-classifiers-from-550k-disordered-speech-samples-2303.07533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-intelligibility-classifiers-from-550k-disordered-speech-samples-2303.07533"/></url>
<url><loc>https://scifaro.com/en/abs/towards-real-time-single-channel-speech-separation-in-noisy-and-reverberant-environments-2303.07569</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-real-time-single-channel-speech-separation-in-noisy-and-reverberant-environments-2303.07569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-real-time-single-channel-speech-separation-in-noisy-and-reverberant-environments-2303.07569"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-feature-encoder-for-wake-up-word-detection-based-on-self-supervised-speech-representation-2303.07592</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-feature-encoder-for-wake-up-word-detection-based-on-self-supervised-speech-representation-2303.07592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-feature-encoder-for-wake-up-word-detection-based-on-self-supervised-speech-representation-2303.07592"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-neural-network-for-icassp-2023-speech-signal-improvement-challenge-2303.07621</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-neural-network-for-icassp-2023-speech-signal-improvement-challenge-2303.07621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-neural-network-for-icassp-2023-speech-signal-improvement-challenge-2303.07621"/></url>
<url><loc>https://scifaro.com/en/abs/tea-pse-3-0-tencent-ethereal-audio-lab-personalized-speech-enhancement-system-for-icassp-2023-dns-challenge-2303.07704</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tea-pse-3-0-tencent-ethereal-audio-lab-personalized-speech-enhancement-system-for-icassp-2023-dns-challenge-2303.07704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tea-pse-3-0-tencent-ethereal-audio-lab-personalized-speech-enhancement-system-for-icassp-2023-dns-challenge-2303.07704"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-masking-with-learnable-filterbank-for-sound-source-separation-2303.07816</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-masking-with-learnable-filterbank-for-sound-source-separation-2303.07816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-masking-with-learnable-filterbank-for-sound-source-separation-2303.07816"/></url>
<url><loc>https://scifaro.com/en/abs/native-multi-band-audio-coding-within-hyper-autoencoded-reconstruction-propagation-networks-2303.08005</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/native-multi-band-audio-coding-within-hyper-autoencoded-reconstruction-propagation-networks-2303.08005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/native-multi-band-audio-coding-within-hyper-autoencoded-reconstruction-propagation-networks-2303.08005"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-pretrained-representations-with-task-related-keywords-for-alzheimer-s-disease-detection-2303.08019</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-pretrained-representations-with-task-related-keywords-for-alzheimer-s-disease-detection-2303.08019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-pretrained-representations-with-task-related-keywords-for-alzheimer-s-disease-detection-2303.08019"/></url>
<url><loc>https://scifaro.com/en/abs/a-hierarchical-regression-chain-framework-for-affective-vocal-burst-recognition-2303.08027</loc><lastmod>2023-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hierarchical-regression-chain-framework-for-affective-vocal-burst-recognition-2303.08027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hierarchical-regression-chain-framework-for-affective-vocal-burst-recognition-2303.08027"/></url>
<url><loc>https://scifaro.com/en/abs/localizing-spatial-information-in-neural-spatiospectral-filters-2303.08052</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localizing-spatial-information-in-neural-spatiospectral-filters-2303.08052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localizing-spatial-information-in-neural-spatiospectral-filters-2303.08052"/></url>
<url><loc>https://scifaro.com/en/abs/sharing-low-rank-conformer-weights-for-tiny-always-on-ambient-speech-recognition-models-2303.08343</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sharing-low-rank-conformer-weights-for-tiny-always-on-ambient-speech-recognition-models-2303.08343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sharing-low-rank-conformer-weights-for-tiny-always-on-ambient-speech-recognition-models-2303.08343"/></url>
<url><loc>https://scifaro.com/en/abs/target-sound-extraction-with-variable-cross-modality-clues-2303.08372</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-sound-extraction-with-variable-cross-modality-clues-2303.08372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-sound-extraction-with-variable-cross-modality-clues-2303.08372"/></url>
<url><loc>https://scifaro.com/en/abs/implementing-continuous-hrtf-measurement-in-near-field-2303.08379</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementing-continuous-hrtf-measurement-in-near-field-2303.08379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementing-continuous-hrtf-measurement-in-near-field-2303.08379"/></url>
<url><loc>https://scifaro.com/en/abs/a-momentum-two-gradient-direction-algorithm-with-variable-step-size-applied-to-solve-practical-output-constraint-issue-for-active-noise-control-2303.08397</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-momentum-two-gradient-direction-algorithm-with-variable-step-size-applied-to-solve-practical-output-constraint-issue-for-active-noise-control-2303.08397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-momentum-two-gradient-direction-algorithm-with-variable-step-size-applied-to-solve-practical-output-constraint-issue-for-active-noise-control-2303.08397"/></url>
<url><loc>https://scifaro.com/en/abs/a-practical-distributed-active-noise-control-algorithm-overcoming-communication-restrictions-2303.08411</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-practical-distributed-active-noise-control-algorithm-overcoming-communication-restrictions-2303.08411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-practical-distributed-active-noise-control-algorithm-overcoming-communication-restrictions-2303.08411"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-source-localization-in-the-spherical-harmonics-domain-exploiting-low-rank-approximations-2303.08480</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-source-localization-in-the-spherical-harmonics-domain-exploiting-low-rank-approximations-2303.08480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-source-localization-in-the-spherical-harmonics-domain-exploiting-low-rank-approximations-2303.08480"/></url>
<url><loc>https://scifaro.com/en/abs/hybridformer-improving-squeezeformer-with-hybrid-attention-and-nsr-mechanism-2303.08636</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybridformer-improving-squeezeformer-with-hybrid-attention-and-nsr-mechanism-2303.08636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybridformer-improving-squeezeformer-with-hybrid-attention-and-nsr-mechanism-2303.08636"/></url>
<url><loc>https://scifaro.com/en/abs/speech-signal-improvement-using-causal-generative-diffusion-models-2303.08674</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-signal-improvement-using-causal-generative-diffusion-models-2303.08674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-signal-improvement-using-causal-generative-diffusion-models-2303.08674"/></url>
<url><loc>https://scifaro.com/en/abs/beamformer-guided-target-speaker-extraction-2303.08702</loc><lastmod>2023-03-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beamformer-guided-target-speaker-extraction-2303.08702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beamformer-guided-target-speaker-extraction-2303.08702"/></url>
<url><loc>https://scifaro.com/en/abs/subspace-hybrid-beamforming-for-head-worn-microphone-arrays-2303.08967</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspace-hybrid-beamforming-for-head-worn-microphone-arrays-2303.08967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspace-hybrid-beamforming-for-head-worn-microphone-arrays-2303.08967"/></url>
<url><loc>https://scifaro.com/en/abs/triaan-vc-triple-adaptive-attention-normalization-for-any-to-any-voice-conversion-2303.09057</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/triaan-vc-triple-adaptive-attention-normalization-for-any-to-any-voice-conversion-2303.09057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/triaan-vc-triple-adaptive-attention-normalization-for-any-to-any-voice-conversion-2303.09057"/></url>
<url><loc>https://scifaro.com/en/abs/distillw2v2-a-small-and-streaming-wav2vec-2-0-based-asr-model-2303.09278</loc><lastmod>2023-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distillw2v2-a-small-and-streaming-wav2vec-2-0-based-asr-model-2303.09278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distillw2v2-a-small-and-streaming-wav2vec-2-0-based-asr-model-2303.09278"/></url>
<url><loc>https://scifaro.com/en/abs/speech-modeling-with-a-hierarchical-transformer-dynamical-vae-2303.09404</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-modeling-with-a-hierarchical-transformer-dynamical-vae-2303.09404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-modeling-with-a-hierarchical-transformer-dynamical-vae-2303.09404"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-prosody-generation-with-partial-inputs-2303.09446</loc><lastmod>2024-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-prosody-generation-with-partial-inputs-2303.09446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-prosody-generation-with-partial-inputs-2303.09446"/></url>
<url><loc>https://scifaro.com/en/abs/magnitude-corrected-and-time-aligned-interpolation-of-head-related-transfer-functions-2303.09966</loc><lastmod>2023-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/magnitude-corrected-and-time-aligned-interpolation-of-head-related-transfer-functions-2303.09966"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/magnitude-corrected-and-time-aligned-interpolation-of-head-related-transfer-functions-2303.09966"/></url>
<url><loc>https://scifaro.com/en/abs/configurable-eben-extreme-bandwidth-extension-network-to-enhance-body-conducted-speech-capture-2303.10008</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/configurable-eben-extreme-bandwidth-extension-network-to-enhance-body-conducted-speech-capture-2303.10008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/configurable-eben-extreme-bandwidth-extension-network-to-enhance-body-conducted-speech-capture-2303.10008"/></url>
<url><loc>https://scifaro.com/en/abs/visual-information-matters-for-asr-error-correction-2303.10160</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-information-matters-for-asr-error-correction-2303.10160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-information-matters-for-asr-error-correction-2303.10160"/></url>
<url><loc>https://scifaro.com/en/abs/powerful-and-extensible-wfst-framework-for-rnn-transducer-losses-2303.10384</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/powerful-and-extensible-wfst-framework-for-rnn-transducer-losses-2303.10384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/powerful-and-extensible-wfst-framework-for-rnn-transducer-losses-2303.10384"/></url>
<url><loc>https://scifaro.com/en/abs/the-graph-feature-fusion-technique-for-speaker-recognition-based-on-wav2vec2-0-framework-2303.10556</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-graph-feature-fusion-technique-for-speaker-recognition-based-on-wav2vec2-0-framework-2303.10556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-graph-feature-fusion-technique-for-speaker-recognition-based-on-wav2vec2-0-framework-2303.10556"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-from-multiple-foundation-models-for-end-to-end-speech-recognition-2303.10917</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-from-multiple-foundation-models-for-end-to-end-speech-recognition-2303.10917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-from-multiple-foundation-models-for-end-to-end-speech-recognition-2303.10917"/></url>
<url><loc>https://scifaro.com/en/abs/code-switching-text-generation-and-injection-in-mandarin-english-asr-2303.10949</loc><lastmod>2023-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/code-switching-text-generation-and-injection-in-mandarin-english-asr-2303.10949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/code-switching-text-generation-and-injection-in-mandarin-english-asr-2303.10949"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-integration-of-speech-separation-and-voice-activity-detection-for-low-latency-diarization-of-telephone-conversations-2303.12002</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-separation-and-voice-activity-detection-for-low-latency-diarization-of-telephone-conversations-2303.12002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-integration-of-speech-separation-and-voice-activity-detection-for-low-latency-diarization-of-telephone-conversations-2303.12002"/></url>
<url><loc>https://scifaro.com/en/abs/practice-of-the-conformer-enhanced-audio-visual-hubert-on-mandarin-and-english-2303.12187</loc><lastmod>2023-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/practice-of-the-conformer-enhanced-audio-visual-hubert-on-mandarin-and-english-2303.12187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/practice-of-the-conformer-enhanced-audio-visual-hubert-on-mandarin-and-english-2303.12187"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-representations-for-singing-voice-conversion-2303.12197</loc><lastmod>2023-03-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-representations-for-singing-voice-conversion-2303.12197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-representations-for-singing-voice-conversion-2303.12197"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-with-speech-modulation-dropout-2303.12908</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-with-speech-modulation-dropout-2303.12908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-with-speech-modulation-dropout-2303.12908"/></url>
<url><loc>https://scifaro.com/en/abs/weighted-pressure-and-mode-matching-for-sound-field-reproduction-theoretical-and-experimental-comparisons-2303.13027</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weighted-pressure-and-mode-matching-for-sound-field-reproduction-theoretical-and-experimental-comparisons-2303.13027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weighted-pressure-and-mode-matching-for-sound-field-reproduction-theoretical-and-experimental-comparisons-2303.13027"/></url>
<url><loc>https://scifaro.com/en/abs/pyramid-multi-branch-fusion-dcnn-with-multi-head-self-attention-for-mandarin-speech-recognition-2303.13243</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pyramid-multi-branch-fusion-dcnn-with-multi-head-self-attention-for-mandarin-speech-recognition-2303.13243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pyramid-multi-branch-fusion-dcnn-with-multi-head-self-attention-for-mandarin-speech-recognition-2303.13243"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-endpointing-with-deep-contextual-multi-armed-bandits-2303.13407</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-endpointing-with-deep-contextual-multi-armed-bandits-2303.13407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-endpointing-with-deep-contextual-multi-armed-bandits-2303.13407"/></url>
<url><loc>https://scifaro.com/en/abs/better-together-dialogue-separation-and-voice-activity-detection-for-audio-personalization-in-tv-2303.13453</loc><lastmod>2023-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/better-together-dialogue-separation-and-voice-activity-detection-for-audio-personalization-in-tv-2303.13453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/better-together-dialogue-separation-and-voice-activity-detection-for-audio-personalization-in-tv-2303.13453"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-speech-enhancement-using-human-quality-perception-modelling-2303.13685</loc><lastmod>2023-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-speech-enhancement-using-human-quality-perception-modelling-2303.13685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-speech-enhancement-using-human-quality-perception-modelling-2303.13685"/></url>
<url><loc>https://scifaro.com/en/abs/text-is-all-you-need-personalizing-asr-models-using-controllable-speech-synthesis-2303.14885</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-is-all-you-need-personalizing-asr-models-using-controllable-speech-synthesis-2303.14885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-is-all-you-need-personalizing-asr-models-using-controllable-speech-synthesis-2303.14885"/></url>
<url><loc>https://scifaro.com/en/abs/partially-adaptive-multichannel-joint-reduction-of-ego-noise-and-environmental-noise-2303.15042</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partially-adaptive-multichannel-joint-reduction-of-ego-noise-and-environmental-noise-2303.15042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partially-adaptive-multichannel-joint-reduction-of-ego-noise-and-environmental-noise-2303.15042"/></url>
<url><loc>https://scifaro.com/en/abs/cross-utterance-asr-rescoring-with-graph-based-label-propagation-2303.15132</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-utterance-asr-rescoring-with-graph-based-label-propagation-2303.15132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-utterance-asr-rescoring-with-graph-based-label-propagation-2303.15132"/></url>
<url><loc>https://scifaro.com/en/abs/a-deliberation-based-joint-acoustic-and-text-decoder-2303.15293</loc><lastmod>2023-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deliberation-based-joint-acoustic-and-text-decoder-2303.15293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deliberation-based-joint-acoustic-and-text-decoder-2303.15293"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-pre-training-for-data-efficient-text-to-speech-on-low-resource-languages-2303.15669</loc><lastmod>2023-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-pre-training-for-data-efficient-text-to-speech-on-low-resource-languages-2303.15669"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-pre-training-for-data-efficient-text-to-speech-on-low-resource-languages-2303.15669"/></url>
<url><loc>https://scifaro.com/en/abs/ad-yolo-you-look-only-once-in-training-multiple-sound-event-localization-and-detection-2303.15703</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ad-yolo-you-look-only-once-in-training-multiple-sound-event-localization-and-detection-2303.15703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ad-yolo-you-look-only-once-in-training-multiple-sound-event-localization-and-detection-2303.15703"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-active-noise-control-method-based-on-sound-field-interpolation-from-reference-microphone-signals-2303.16021</loc><lastmod>2023-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-active-noise-control-method-based-on-sound-field-interpolation-from-reference-microphone-signals-2303.16021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-active-noise-control-method-based-on-sound-field-interpolation-from-reference-microphone-signals-2303.16021"/></url>
<url><loc>https://scifaro.com/en/abs/kernel-interpolation-based-spatial-active-noise-control-with-exterior-radiation-suppression-2303.16389</loc><lastmod>2023-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kernel-interpolation-based-spatial-active-noise-control-with-exterior-radiation-suppression-2303.16389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kernel-interpolation-based-spatial-active-noise-control-with-exterior-radiation-suppression-2303.16389"/></url>
<url><loc>https://scifaro.com/en/abs/joint-unsupervised-and-supervised-learning-for-context-aware-language-identification-2303.16511</loc><lastmod>2023-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-unsupervised-and-supervised-learning-for-context-aware-language-identification-2303.16511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-unsupervised-and-supervised-learning-for-context-aware-language-identification-2303.16511"/></url>
<url><loc>https://scifaro.com/en/abs/procter-pronunciation-aware-contextual-adapter-for-personalized-speech-recognition-in-neural-transducers-2303.17131</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/procter-pronunciation-aware-contextual-adapter-for-personalized-speech-recognition-in-neural-transducers-2303.17131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/procter-pronunciation-aware-contextual-adapter-for-personalized-speech-recognition-in-neural-transducers-2303.17131"/></url>
<url><loc>https://scifaro.com/en/abs/wavcaps-a-chatgpt-assisted-weakly-labelled-audio-captioning-dataset-for-audio-language-multimodal-research-2303.17395</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavcaps-a-chatgpt-assisted-weakly-labelled-audio-captioning-dataset-for-audio-language-multimodal-research-2303.17395"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavcaps-a-chatgpt-assisted-weakly-labelled-audio-captioning-dataset-for-audio-language-multimodal-research-2303.17395"/></url>
<url><loc>https://scifaro.com/en/abs/prefix-tuning-for-automated-audio-captioning-2303.17489</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prefix-tuning-for-automated-audio-captioning-2303.17489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prefix-tuning-for-automated-audio-captioning-2303.17489"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-noise-reduction-methods-for-sentence-recognition-by-sinhala-speaking-listeners-2303.17829</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-noise-reduction-methods-for-sentence-recognition-by-sinhala-speaking-listeners-2303.17829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-noise-reduction-methods-for-sentence-recognition-by-sinhala-speaking-listeners-2303.17829"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-hankel-matrix-rank-minimization-for-audio-inpainting-2303.18023</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-hankel-matrix-rank-minimization-for-audio-inpainting-2303.18023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-hankel-matrix-rank-minimization-for-audio-inpainting-2303.18023"/></url>
<url><loc>https://scifaro.com/en/abs/improving-meeting-inclusiveness-using-speech-interruption-analysis-2304.00658</loc><lastmod>2023-04-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-meeting-inclusiveness-using-speech-interruption-analysis-2304.00658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-meeting-inclusiveness-using-speech-interruption-analysis-2304.00658"/></url>
<url><loc>https://scifaro.com/en/abs/ensemble-prosody-prediction-for-expressive-speech-synthesis-2304.00714</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensemble-prosody-prediction-for-expressive-speech-synthesis-2304.00714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensemble-prosody-prediction-for-expressive-speech-synthesis-2304.00714"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-based-source-separation-for-meeting-data-2304.00871</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-based-source-separation-for-meeting-data-2304.00871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-based-source-separation-for-meeting-data-2304.00871"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-word-segmentation-using-temporal-gradient-pseudo-labels-2304.00993</loc><lastmod>2023-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-word-segmentation-using-temporal-gradient-pseudo-labels-2304.00993"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-word-segmentation-using-temporal-gradient-pseudo-labels-2304.00993"/></url>
<url><loc>https://scifaro.com/en/abs/torchaudio-squim-reference-less-speech-quality-and-intelligibility-measures-in-torchaudio-2304.01448</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/torchaudio-squim-reference-less-speech-quality-and-intelligibility-measures-in-torchaudio-2304.01448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/torchaudio-squim-reference-less-speech-quality-and-intelligibility-measures-in-torchaudio-2304.01448"/></url>
<url><loc>https://scifaro.com/en/abs/independent-vector-extraction-constrained-on-manifold-of-half-length-filters-2304.01778</loc><lastmod>2023-04-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-vector-extraction-constrained-on-manifold-of-half-length-filters-2304.01778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-vector-extraction-constrained-on-manifold-of-half-length-filters-2304.01778"/></url>
<url><loc>https://scifaro.com/en/abs/margin-mixup-a-method-for-robust-speaker-verification-in-multi-speaker-audio-2304.03515</loc><lastmod>2023-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/margin-mixup-a-method-for-robust-speaker-verification-in-multi-speaker-audio-2304.03515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/margin-mixup-a-method-for-robust-speaker-verification-in-multi-speaker-audio-2304.03515"/></url>
<url><loc>https://scifaro.com/en/abs/an-unsupervised-segmentation-of-vocal-breath-sounds-2304.03758</loc><lastmod>2023-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-unsupervised-segmentation-of-vocal-breath-sounds-2304.03758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-unsupervised-segmentation-of-vocal-breath-sounds-2304.03758"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-phrase-break-prediction-in-an-end-to-end-tts-system-2304.04157</loc><lastmod>2025-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-phrase-break-prediction-in-an-end-to-end-tts-system-2304.04157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-phrase-break-prediction-in-an-end-to-end-tts-system-2304.04157"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-neural-representations-for-audio-manipulation-2304.04394</loc><lastmod>2023-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-neural-representations-for-audio-manipulation-2304.04394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-neural-representations-for-audio-manipulation-2304.04394"/></url>
<url><loc>https://scifaro.com/en/abs/wav2code-restore-clean-speech-representations-via-codebook-lookup-for-noise-robust-asr-2304.04974</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2code-restore-clean-speech-representations-via-codebook-lookup-for-noise-robust-asr-2304.04974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2code-restore-clean-speech-representations-via-codebook-lookup-for-noise-robust-asr-2304.04974"/></url>
<url><loc>https://scifaro.com/en/abs/audio-bank-a-high-level-acoustic-signal-representation-for-audio-event-recognition-2304.05067</loc><lastmod>2023-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-bank-a-high-level-acoustic-signal-representation-for-audio-event-recognition-2304.05067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-bank-a-high-level-acoustic-signal-representation-for-audio-event-recognition-2304.05067"/></url>
<url><loc>https://scifaro.com/en/abs/speech-reconstruction-from-silent-tongue-and-lip-articulation-by-pseudo-target-generation-and-domain-adversarial-training-2304.05574</loc><lastmod>2023-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-reconstruction-from-silent-tongue-and-lip-articulation-by-pseudo-target-generation-and-domain-adversarial-training-2304.05574"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-reconstruction-from-silent-tongue-and-lip-articulation-by-pseudo-target-generation-and-domain-adversarial-training-2304.05574"/></url>
<url><loc>https://scifaro.com/en/abs/filler-word-detection-with-hard-category-mining-and-inter-category-focal-loss-2304.05922</loc><lastmod>2023-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/filler-word-detection-with-hard-category-mining-and-inter-category-focal-loss-2304.05922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/filler-word-detection-with-hard-category-mining-and-inter-category-focal-loss-2304.05922"/></url>
<url><loc>https://scifaro.com/en/abs/regularizing-contrastive-predictive-coding-for-speech-applications-2304.05974</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regularizing-contrastive-predictive-coding-for-speech-applications-2304.05974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regularizing-contrastive-predictive-coding-for-speech-applications-2304.05974"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-absement-in-detail-quantifying-acoustic-differences-across-time-series-representations-of-speech-data-2304.06183</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-absement-in-detail-quantifying-acoustic-differences-across-time-series-representations-of-speech-data-2304.06183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-absement-in-detail-quantifying-acoustic-differences-across-time-series-representations-of-speech-data-2304.06183"/></url>
<url><loc>https://scifaro.com/en/abs/the-future-of-hearing-aid-technology-2304.06786</loc><lastmod>2023-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-future-of-hearing-aid-technology-2304.06786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-future-of-hearing-aid-technology-2304.06786"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-sequence-transduction-by-jointly-predicting-tokens-and-durations-2304.06795</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-sequence-transduction-by-jointly-predicting-tokens-and-durations-2304.06795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-sequence-transduction-by-jointly-predicting-tokens-and-durations-2304.06795"/></url>
<url><loc>https://scifaro.com/en/abs/hcam-hierarchical-cross-attention-model-for-multi-modal-emotion-recognition-2304.06910</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hcam-hierarchical-cross-attention-model-for-multi-modal-emotion-recognition-2304.06910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hcam-hierarchical-cross-attention-model-for-multi-modal-emotion-recognition-2304.06910"/></url>
<url><loc>https://scifaro.com/en/abs/1-d-residual-convolutional-neural-network-coupled-with-data-augmentation-and-regularization-for-the-icphm-2023-data-challenge-2304.07305</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1-d-residual-convolutional-neural-network-coupled-with-data-augmentation-and-regularization-for-the-icphm-2023-data-challenge-2304.07305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1-d-residual-convolutional-neural-network-coupled-with-data-augmentation-and-regularization-for-the-icphm-2023-data-challenge-2304.07305"/></url>
<url><loc>https://scifaro.com/en/abs/airborne-sound-analysis-for-the-detection-of-bearing-faults-in-railway-vehicles-with-real-world-data-2304.07307</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/airborne-sound-analysis-for-the-detection-of-bearing-faults-in-railway-vehicles-with-real-world-data-2304.07307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/airborne-sound-analysis-for-the-detection-of-bearing-faults-in-railway-vehicles-with-real-world-data-2304.07307"/></url>
<url><loc>https://scifaro.com/en/abs/soft-label-coding-for-end-to-end-sound-source-localization-with-ad-hoc-microphone-arrays-2304.07512</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soft-label-coding-for-end-to-end-sound-source-localization-with-ad-hoc-microphone-arrays-2304.07512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soft-label-coding-for-end-to-end-sound-source-localization-with-ad-hoc-microphone-arrays-2304.07512"/></url>
<url><loc>https://scifaro.com/en/abs/a-virtual-simulation-pilot-agent-for-training-of-air-traffic-controllers-2304.07842</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-virtual-simulation-pilot-agent-for-training-of-air-traffic-controllers-2304.07842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-virtual-simulation-pilot-agent-for-training-of-air-traffic-controllers-2304.07842"/></url>
<url><loc>https://scifaro.com/en/abs/audio-coding-with-unified-noise-shaping-and-phase-contrast-control-2304.08076</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-coding-with-unified-noise-shaping-and-phase-contrast-control-2304.08076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-coding-with-unified-noise-shaping-and-phase-contrast-control-2304.08076"/></url>
<url><loc>https://scifaro.com/en/abs/neural-tts-in-french-comparing-graphemic-and-phonetic-inputs-using-the-synpaflex-corpus-and-tacotron2-2304.08209</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-tts-in-french-comparing-graphemic-and-phonetic-inputs-using-the-synpaflex-corpus-and-tacotron2-2304.08209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-tts-in-french-comparing-graphemic-and-phonetic-inputs-using-the-synpaflex-corpus-and-tacotron2-2304.08209"/></url>
<url><loc>https://scifaro.com/en/abs/novel-features-for-the-detection-of-bearing-faults-in-railway-vehicles-2304.08249</loc><lastmod>2023-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/novel-features-for-the-detection-of-bearing-faults-in-railway-vehicles-2304.08249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/novel-features-for-the-detection-of-bearing-faults-in-railway-vehicles-2304.08249"/></url>
<url><loc>https://scifaro.com/en/abs/how-tiny-can-analog-filterbank-features-be-made-for-ultra-low-power-on-device-keyword-spotting-2304.08541</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-tiny-can-analog-filterbank-features-be-made-for-ultra-low-power-on-device-keyword-spotting-2304.08541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-tiny-can-analog-filterbank-features-be-made-for-ultra-low-power-on-device-keyword-spotting-2304.08541"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-enhancement-with-very-low-algorithmic-latency-and-complexity-via-integrated-full-and-sub-band-modeling-2304.08707</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-enhancement-with-very-low-algorithmic-latency-and-complexity-via-integrated-full-and-sub-band-modeling-2304.08707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-enhancement-with-very-low-algorithmic-latency-and-complexity-via-integrated-full-and-sub-band-modeling-2304.08707"/></url>
<url><loc>https://scifaro.com/en/abs/array-configuration-agnostic-personal-voice-activity-detection-based-on-spatial-coherence-2304.08887</loc><lastmod>2023-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/array-configuration-agnostic-personal-voice-activity-detection-based-on-spatial-coherence-2304.08887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/array-configuration-agnostic-personal-voice-activity-detection-based-on-spatial-coherence-2304.08887"/></url>
<url><loc>https://scifaro.com/en/abs/naturalspeech-2-latent-diffusion-models-are-natural-and-zero-shot-speech-and-singing-synthesizers-2304.09116</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturalspeech-2-latent-diffusion-models-are-natural-and-zero-shot-speech-and-singing-synthesizers-2304.09116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturalspeech-2-latent-diffusion-models-are-natural-and-zero-shot-speech-and-singing-synthesizers-2304.09116"/></url>
<url><loc>https://scifaro.com/en/abs/coded-speech-quality-measurement-by-a-non-intrusive-pesq-dnn-2304.09226</loc><lastmod>2023-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coded-speech-quality-measurement-by-a-non-intrusive-pesq-dnn-2304.09226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coded-speech-quality-measurement-by-a-non-intrusive-pesq-dnn-2304.09226"/></url>
<url><loc>https://scifaro.com/en/abs/aircade-an-anechoic-and-ir-convolution-based-auralization-data-compilation-ensemble-2304.09318</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aircade-an-anechoic-and-ir-convolution-based-auralization-data-compilation-ensemble-2304.09318"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aircade-an-anechoic-and-ir-convolution-based-auralization-data-compilation-ensemble-2304.09318"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-chunk-convolution-for-unified-streaming-and-non-streaming-conformer-asr-2304.09325</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-chunk-convolution-for-unified-streaming-and-non-streaming-conformer-asr-2304.09325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-chunk-convolution-for-unified-streaming-and-non-streaming-conformer-asr-2304.09325"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-query-by-example-keyword-spotting-with-metric-learning-and-phoneme-to-embedding-mapping-2304.09585</loc><lastmod>2023-04-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-query-by-example-keyword-spotting-with-metric-learning-and-phoneme-to-embedding-mapping-2304.09585"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-query-by-example-keyword-spotting-with-metric-learning-and-phoneme-to-embedding-mapping-2304.09585"/></url>
<url><loc>https://scifaro.com/en/abs/towards-the-universal-defense-for-query-based-audio-adversarial-attacks-2304.10088</loc><lastmod>2023-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-the-universal-defense-for-query-based-audio-adversarial-attacks-2304.10088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-the-universal-defense-for-query-based-audio-adversarial-attacks-2304.10088"/></url>
<url><loc>https://scifaro.com/en/abs/non-autoregressive-end-to-end-approaches-for-joint-automatic-speech-recognition-and-spoken-language-understanding-2304.10869</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-autoregressive-end-to-end-approaches-for-joint-automatic-speech-recognition-and-spoken-language-understanding-2304.10869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-autoregressive-end-to-end-approaches-for-joint-automatic-speech-recognition-and-spoken-language-understanding-2304.10869"/></url>
<url><loc>https://scifaro.com/en/abs/heart-rate-extraction-from-abdominal-audio-signals-2304.11020</loc><lastmod>2023-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heart-rate-extraction-from-abdominal-audio-signals-2304.11020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heart-rate-extraction-from-abdominal-audio-signals-2304.11020"/></url>
<url><loc>https://scifaro.com/en/abs/olisia-a-cascade-system-for-spoken-dialogue-state-tracking-2304.11073</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/olisia-a-cascade-system-for-spoken-dialogue-state-tracking-2304.11073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/olisia-a-cascade-system-for-spoken-dialogue-state-tracking-2304.11073"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-pre-trained-speech-and-audio-embeddings-for-speech-emotion-recognition-2304.11472</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-pre-trained-speech-and-audio-embeddings-for-speech-emotion-recognition-2304.11472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-pre-trained-speech-and-audio-embeddings-for-speech-emotion-recognition-2304.11472"/></url>
<url><loc>https://scifaro.com/en/abs/towards-controllable-audio-texture-morphing-2304.11648</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-controllable-audio-texture-morphing-2304.11648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-controllable-audio-texture-morphing-2304.11648"/></url>
<url><loc>https://scifaro.com/en/abs/diffvoice-text-to-speech-with-latent-diffusion-2304.11750</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffvoice-text-to-speech-with-latent-diffusion-2304.11750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffvoice-text-to-speech-with-latent-diffusion-2304.11750"/></url>
<url><loc>https://scifaro.com/en/abs/self-regularised-minimum-latency-training-for-streaming-transformer-based-speech-recognition-2304.11985</loc><lastmod>2023-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-regularised-minimum-latency-training-for-streaming-transformer-based-speech-recognition-2304.11985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-regularised-minimum-latency-training-for-streaming-transformer-based-speech-recognition-2304.11985"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-speech-separation-using-spatially-selective-deep-non-linear-filters-2304.12023</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-speech-separation-using-spatially-selective-deep-non-linear-filters-2304.12023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-speech-separation-using-spatially-selective-deep-non-linear-filters-2304.12023"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-translation-accuracy-and-time-efficiency-with-fine-tuned-wav2vec-2-0-based-speech-segmentation-2304.12659</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-translation-accuracy-and-time-efficiency-with-fine-tuned-wav2vec-2-0-based-speech-segmentation-2304.12659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-translation-accuracy-and-time-efficiency-with-fine-tuned-wav2vec-2-0-based-speech-segmentation-2304.12659"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-audio-tagging-assisted-sound-event-detection-using-weakified-strong-labels-and-frequency-dynamic-convolutions-2304.12688</loc><lastmod>2023-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-audio-tagging-assisted-sound-event-detection-using-weakified-strong-labels-and-frequency-dynamic-convolutions-2304.12688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-audio-tagging-assisted-sound-event-detection-using-weakified-strong-labels-and-frequency-dynamic-convolutions-2304.12688"/></url>
<url><loc>https://scifaro.com/en/abs/source-filter-based-generative-adversarial-neural-vocoder-for-high-fidelity-speech-synthesis-2304.13270</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-filter-based-generative-adversarial-neural-vocoder-for-high-fidelity-speech-synthesis-2304.13270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-filter-based-generative-adversarial-neural-vocoder-for-high-fidelity-speech-synthesis-2304.13270"/></url>
<url><loc>https://scifaro.com/en/abs/all-information-is-necessary-integrating-speech-positive-and-negative-information-by-contrastive-learning-for-speech-enhancement-2304.13439</loc><lastmod>2023-04-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-information-is-necessary-integrating-speech-positive-and-negative-information-by-contrastive-learning-for-speech-enhancement-2304.13439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-information-is-necessary-integrating-speech-positive-and-negative-information-by-contrastive-learning-for-speech-enhancement-2304.13439"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-audio-generation-using-instruction-tuned-llm-and-latent-diffusion-model-2304.13731</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-audio-generation-using-instruction-tuned-llm-and-latent-diffusion-model-2304.13731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-audio-generation-using-instruction-tuned-llm-and-latent-diffusion-model-2304.13731"/></url>
<url><loc>https://scifaro.com/en/abs/towards-better-domain-adaptation-for-self-supervised-models-a-case-study-of-child-asr-2305.00115</loc><lastmod>2023-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-better-domain-adaptation-for-self-supervised-models-a-case-study-of-child-asr-2305.00115"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-better-domain-adaptation-for-self-supervised-models-a-case-study-of-child-asr-2305.00115"/></url>
<url><loc>https://scifaro.com/en/abs/a-review-of-deep-learning-techniques-for-speech-processing-2305.00359</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-review-of-deep-learning-techniques-for-speech-processing-2305.00359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-review-of-deep-learning-techniques-for-speech-processing-2305.00359"/></url>
<url><loc>https://scifaro.com/en/abs/lessons-learned-in-atco2-5000-hours-of-air-traffic-control-communications-for-robust-automatic-speech-recognition-and-understanding-2305.01155</loc><lastmod>2023-05-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lessons-learned-in-atco2-5000-hours-of-air-traffic-control-communications-for-robust-automatic-speech-recognition-and-understanding-2305.01155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lessons-learned-in-atco2-5000-hours-of-air-traffic-control-communications-for-robust-automatic-speech-recognition-and-understanding-2305.01155"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-for-joint-acoustic-echo-and-acoustic-howling-suppression-in-hybrid-meetings-2305.01637</loc><lastmod>2023-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-for-joint-acoustic-echo-and-acoustic-howling-suppression-in-hybrid-meetings-2305.01637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-for-joint-acoustic-echo-and-acoustic-howling-suppression-in-hybrid-meetings-2305.01637"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-generative-nmf-for-single-channel-source-separation-2305.01758</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-generative-nmf-for-single-channel-source-separation-2305.01758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-generative-nmf-for-single-channel-source-separation-2305.01758"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-speaker-anonymization-on-emotional-speech-2305.01759</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-speaker-anonymization-on-emotional-speech-2305.01759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-speaker-anonymization-on-emotional-speech-2305.01759"/></url>
<url><loc>https://scifaro.com/en/abs/sound-in-occupied-open-plan-offices-objective-metrics-with-a-review-of-historical-perspectives-2305.01762</loc><lastmod>2023-05-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-in-occupied-open-plan-offices-objective-metrics-with-a-review-of-historical-perspectives-2305.01762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-in-occupied-open-plan-offices-objective-metrics-with-a-review-of-historical-perspectives-2305.01762"/></url>
<url><loc>https://scifaro.com/en/abs/improved-vocal-effort-transfer-vector-estimation-for-vocal-effort-robust-speaker-verification-2305.02147</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-vocal-effort-transfer-vector-estimation-for-vocal-effort-robust-speaker-verification-2305.02147"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-vocal-effort-transfer-vector-estimation-for-vocal-effort-robust-speaker-verification-2305.02147"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-ahs-a-hybrid-of-kalman-filter-and-deep-learning-for-acoustic-howling-suppression-2305.02583</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-ahs-a-hybrid-of-kalman-filter-and-deep-learning-for-acoustic-howling-suppression-2305.02583"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-ahs-a-hybrid-of-kalman-filter-and-deep-learning-for-acoustic-howling-suppression-2305.02583"/></url>
<url><loc>https://scifaro.com/en/abs/an-acoustic-simulation-framework-to-support-indoor-positioning-and-data-driven-signal-processing-assessments-2305.02715</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-acoustic-simulation-framework-to-support-indoor-positioning-and-data-driven-signal-processing-assessments-2305.02715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-acoustic-simulation-framework-to-support-indoor-positioning-and-data-driven-signal-processing-assessments-2305.02715"/></url>
<url><loc>https://scifaro.com/en/abs/plug-and-play-multilingual-few-shot-spoken-words-recognition-2305.03058</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plug-and-play-multilingual-few-shot-spoken-words-recognition-2305.03058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plug-and-play-multilingual-few-shot-spoken-words-recognition-2305.03058"/></url>
<url><loc>https://scifaro.com/en/abs/employing-hybrid-deep-neural-networks-on-dari-speech-2305.03200</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-hybrid-deep-neural-networks-on-dari-speech-2305.03200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-hybrid-deep-neural-networks-on-dari-speech-2305.03200"/></url>
<url><loc>https://scifaro.com/en/abs/time-weighted-frequency-domain-audio-representation-with-gmm-estimator-for-anomalous-sound-detection-2305.03328</loc><lastmod>2023-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-weighted-frequency-domain-audio-representation-with-gmm-estimator-for-anomalous-sound-detection-2305.03328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-weighted-frequency-domain-audio-representation-with-gmm-estimator-for-anomalous-sound-detection-2305.03328"/></url>
<url><loc>https://scifaro.com/en/abs/blind-identification-of-ambisonic-reduced-room-impulse-response-2305.03558</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-identification-of-ambisonic-reduced-room-impulse-response-2305.03558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-identification-of-ambisonic-reduced-room-impulse-response-2305.03558"/></url>
<url><loc>https://scifaro.com/en/abs/mask-the-bias-improving-domain-adaptive-generalization-of-ctc-based-asr-with-internal-language-model-estimation-2305.03837</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-the-bias-improving-domain-adaptive-generalization-of-ctc-based-asr-with-internal-language-model-estimation-2305.03837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-the-bias-improving-domain-adaptive-generalization-of-ctc-based-asr-with-internal-language-model-estimation-2305.03837"/></url>
<url><loc>https://scifaro.com/en/abs/lookahead-when-it-matters-adaptive-non-causal-transformers-for-streaming-neural-transducers-2305.04159</loc><lastmod>2023-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lookahead-when-it-matters-adaptive-non-causal-transformers-for-streaming-neural-transducers-2305.04159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lookahead-when-it-matters-adaptive-non-causal-transformers-for-streaming-neural-transducers-2305.04159"/></url>
<url><loc>https://scifaro.com/en/abs/neural-steerer-novel-steering-vector-synthesis-with-a-causal-neural-field-over-frequency-and-source-positions-2305.04447</loc><lastmod>2024-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-steerer-novel-steering-vector-synthesis-with-a-causal-neural-field-over-frequency-and-source-positions-2305.04447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-steerer-novel-steering-vector-synthesis-with-a-causal-neural-field-over-frequency-and-source-positions-2305.04447"/></url>
<url><loc>https://scifaro.com/en/abs/alignsts-speech-to-singing-conversion-via-cross-modal-alignment-2305.04476</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignsts-speech-to-singing-conversion-via-cross-modal-alignment-2305.04476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignsts-speech-to-singing-conversion-via-cross-modal-alignment-2305.04476"/></url>
<url><loc>https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-limited-data-2305.04816</loc><lastmod>2023-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-limited-data-2305.04816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accented-text-to-speech-synthesis-with-limited-data-2305.04816"/></url>
<url><loc>https://scifaro.com/en/abs/fast-conformer-with-linearly-scalable-attention-for-efficient-speech-recognition-2305.05084</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-conformer-with-linearly-scalable-attention-for-efficient-speech-recognition-2305.05084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-conformer-with-linearly-scalable-attention-for-efficient-speech-recognition-2305.05084"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-in-speech-technology-2305.05227</loc><lastmod>2025-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-in-speech-technology-2305.05227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-in-speech-technology-2305.05227"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploration-into-the-performance-of-unsupervised-cross-task-speech-representations-for-in-the-wild-edge-applications-2305.05443</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploration-into-the-performance-of-unsupervised-cross-task-speech-representations-for-in-the-wild-edge-applications-2305.05443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploration-into-the-performance-of-unsupervised-cross-task-speech-representations-for-in-the-wild-edge-applications-2305.05443"/></url>
<url><loc>https://scifaro.com/en/abs/accurate-real-time-estimation-of-2-dimensional-direction-of-arrival-using-a-3-microphone-array-2305.05630</loc><lastmod>2023-05-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accurate-real-time-estimation-of-2-dimensional-direction-of-arrival-using-a-3-microphone-array-2305.05630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accurate-real-time-estimation-of-2-dimensional-direction-of-arrival-using-a-3-microphone-array-2305.05630"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-signal-refiner-for-speech-enhancement-and-separation-2305.05857</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-signal-refiner-for-speech-enhancement-and-separation-2305.05857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-signal-refiner-for-speech-enhancement-and-separation-2305.05857"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-diaphragm-excursion-prediction-deep-attention-and-online-adaptation-2305.06640</loc><lastmod>2023-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-diaphragm-excursion-prediction-deep-attention-and-online-adaptation-2305.06640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-diaphragm-excursion-prediction-deep-attention-and-online-adaptation-2305.06640"/></url>
<url><loc>https://scifaro.com/en/abs/quran-recitation-recognition-using-end-to-end-deep-learning-2305.07034</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quran-recitation-recognition-using-end-to-end-deep-learning-2305.07034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quran-recitation-recognition-using-end-to-end-deep-learning-2305.07034"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-temporal-channel-speaker-retrieval-for-zero-shot-voice-conversion-2305.07204</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-temporal-channel-speaker-retrieval-for-zero-shot-voice-conversion-2305.07204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-temporal-channel-speaker-retrieval-for-zero-shot-voice-conversion-2305.07204"/></url>
<url><loc>https://scifaro.com/en/abs/qvoice-arabic-speech-pronunciation-learning-application-2305.07445</loc><lastmod>2023-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qvoice-arabic-speech-pronunciation-learning-application-2305.07445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qvoice-arabic-speech-pronunciation-learning-application-2305.07445"/></url>
<url><loc>https://scifaro.com/en/abs/the-whole-is-greater-than-the-sum-of-its-parts-improving-music-source-separation-by-bridging-network-2305.07855</loc><lastmod>2024-08-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-whole-is-greater-than-the-sum-of-its-parts-improving-music-source-separation-by-bridging-network-2305.07855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-whole-is-greater-than-the-sum-of-its-parts-improving-music-source-separation-by-bridging-network-2305.07855"/></url>
<url><loc>https://scifaro.com/en/abs/vocal-style-factorization-for-effective-speaker-recognition-in-affective-scenarios-2305.07997</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocal-style-factorization-for-effective-speaker-recognition-in-affective-scenarios-2305.07997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocal-style-factorization-for-effective-speaker-recognition-in-affective-scenarios-2305.07997"/></url>
<url><loc>https://scifaro.com/en/abs/deep-multi-frame-filtering-for-hearing-aids-2305.08225</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-multi-frame-filtering-for-hearing-aids-2305.08225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-multi-frame-filtering-for-hearing-aids-2305.08225"/></url>
<url><loc>https://scifaro.com/en/abs/deepfilternet-perceptually-motivated-real-time-speech-enhancement-2305.08227</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deepfilternet-perceptually-motivated-real-time-speech-enhancement-2305.08227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deepfilternet-perceptually-motivated-real-time-speech-enhancement-2305.08227"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-uncertainty-into-neural-network-based-speech-enhancement-2305.08744</loc><lastmod>2023-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-uncertainty-into-neural-network-based-speech-enhancement-2305.08744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-uncertainty-into-neural-network-based-speech-enhancement-2305.08744"/></url>
<url><loc>https://scifaro.com/en/abs/tg-critic-a-timbre-guided-model-for-reference-independent-singing-evaluation-2305.09127</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tg-critic-a-timbre-guided-model-for-reference-independent-singing-evaluation-2305.09127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tg-critic-a-timbre-guided-model-for-reference-independent-singing-evaluation-2305.09127"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-global-interaction-and-local-alignment-for-audio-visual-speech-recognition-2305.09212</loc><lastmod>2023-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-global-interaction-and-local-alignment-for-audio-visual-speech-recognition-2305.09212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-global-interaction-and-local-alignment-for-audio-visual-speech-recognition-2305.09212"/></url>
<url><loc>https://scifaro.com/en/abs/continual-learning-for-end-to-end-asr-by-averaging-domain-experts-2305.09681</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-learning-for-end-to-end-asr-by-averaging-domain-experts-2305.09681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-learning-for-end-to-end-asr-by-averaging-domain-experts-2305.09681"/></url>
<url><loc>https://scifaro.com/en/abs/ood-speech-a-large-bengali-speech-recognition-dataset-for-out-of-distribution-benchmarking-2305.09688</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ood-speech-a-large-bengali-speech-recognition-dataset-for-out-of-distribution-benchmarking-2305.09688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ood-speech-a-large-bengali-speech-recognition-dataset-for-out-of-distribution-benchmarking-2305.09688"/></url>
<url><loc>https://scifaro.com/en/abs/basen-time-domain-brain-assisted-speech-enhancement-network-with-convolutional-cross-attention-in-multi-talker-conditions-2305.09994</loc><lastmod>2023-05-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/basen-time-domain-brain-assisted-speech-enhancement-network-with-convolutional-cross-attention-in-multi-talker-conditions-2305.09994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/basen-time-domain-brain-assisted-speech-enhancement-network-with-convolutional-cross-attention-in-multi-talker-conditions-2305.09994"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speaker-verification-with-self-pretrained-transformer-models-2305.10517</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speaker-verification-with-self-pretrained-transformer-models-2305.10517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speaker-verification-with-self-pretrained-transformer-models-2305.10517"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-range-equalizing-bias-in-mean-opinion-score-ratings-of-synthesized-speech-2305.10608</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-range-equalizing-bias-in-mean-opinion-score-ratings-of-synthesized-speech-2305.10608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-range-equalizing-bias-in-mean-opinion-score-ratings-of-synthesized-speech-2305.10608"/></url>
<url><loc>https://scifaro.com/en/abs/use-of-speech-impairment-severity-for-dysarthric-speech-recognition-2305.10659</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/use-of-speech-impairment-severity-for-dysarthric-speech-recognition-2305.10659"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/use-of-speech-impairment-severity-for-dysarthric-speech-recognition-2305.10659"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-for-diverse-voice-conversion-in-noisy-environments-2305.10684</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-for-diverse-voice-conversion-in-noisy-environments-2305.10684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-for-diverse-voice-conversion-in-noisy-environments-2305.10684"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-task-learning-framework-for-sound-event-detection-using-high-level-acoustic-characteristics-of-sounds-2305.10729</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-task-learning-framework-for-sound-event-detection-using-high-level-acoustic-characteristics-of-sounds-2305.10729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-task-learning-framework-for-sound-event-detection-using-high-level-acoustic-characteristics-of-sounds-2305.10729"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-articulation-analysis-using-a-geometric-transformation-of-the-x-ray-microbeam-dataset-2305.10775</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-articulation-analysis-using-a-geometric-transformation-of-the-x-ray-microbeam-dataset-2305.10775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-articulation-analysis-using-a-geometric-transformation-of-the-x-ray-microbeam-dataset-2305.10775"/></url>
<url><loc>https://scifaro.com/en/abs/listen-think-and-understand-2305.10790</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-think-and-understand-2305.10790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-think-and-understand-2305.10790"/></url>
<url><loc>https://scifaro.com/en/abs/tacos-learning-temporally-structured-embeddings-for-few-shot-keyword-spotting-with-dynamic-time-warping-2305.10816</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tacos-learning-temporally-structured-embeddings-for-few-shot-keyword-spotting-with-dynamic-time-warping-2305.10816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tacos-learning-temporally-structured-embeddings-for-few-shot-keyword-spotting-with-dynamic-time-warping-2305.10816"/></url>
<url><loc>https://scifaro.com/en/abs/locate-and-beamform-two-dimensional-locating-all-neural-beamformer-for-multi-channel-speech-separation-2305.10821</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locate-and-beamform-two-dimensional-locating-all-neural-beamformer-for-multi-channel-speech-separation-2305.10821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locate-and-beamform-two-dimensional-locating-all-neural-beamformer-for-multi-channel-speech-separation-2305.10821"/></url>
<url><loc>https://scifaro.com/en/abs/fastfit-towards-real-time-iterative-neural-vocoder-by-replacing-u-net-encoder-with-multiple-stfts-2305.10823</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastfit-towards-real-time-iterative-neural-vocoder-by-replacing-u-net-encoder-with-multiple-stfts-2305.10823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastfit-towards-real-time-iterative-neural-vocoder-by-replacing-u-net-encoder-with-multiple-stfts-2305.10823"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-mel-spectrogram-enhancement-for-personalized-speech-synthesis-with-found-data-2305.10891</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-mel-spectrogram-enhancement-for-personalized-speech-synthesis-with-found-data-2305.10891"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-mel-spectrogram-enhancement-for-personalized-speech-synthesis-with-found-data-2305.10891"/></url>
<url><loc>https://scifaro.com/en/abs/improving-generalization-ability-of-countermeasures-for-new-mismatch-scenario-by-combining-multiple-advanced-regularization-terms-2305.10940</loc><lastmod>2023-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-generalization-ability-of-countermeasures-for-new-mismatch-scenario-by-combining-multiple-advanced-regularization-terms-2305.10940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-generalization-ability-of-countermeasures-for-new-mismatch-scenario-by-combining-multiple-advanced-regularization-terms-2305.10940"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-the-hidden-talent-of-web-scale-speech-models-for-zero-shot-task-generalization-2305.11095</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-the-hidden-talent-of-web-scale-speech-models-for-zero-shot-task-generalization-2305.11095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-the-hidden-talent-of-web-scale-speech-models-for-zero-shot-task-generalization-2305.11095"/></url>
<url><loc>https://scifaro.com/en/abs/mdctgan-taming-transformer-based-gan-for-speech-super-resolution-with-modified-dct-spectra-2305.11104</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mdctgan-taming-transformer-based-gan-for-speech-super-resolution-with-modified-dct-spectra-2305.11104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mdctgan-taming-transformer-based-gan-for-speech-super-resolution-with-modified-dct-spectra-2305.11104"/></url>
<url><loc>https://scifaro.com/en/abs/federated-learning-for-secure-development-of-ai-models-for-parkinson-s-disease-detection-using-speech-from-different-languages-2305.11284</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-learning-for-secure-development-of-ai-models-for-parkinson-s-disease-detection-using-speech-from-different-languages-2305.11284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-learning-for-secure-development-of-ai-models-for-parkinson-s-disease-detection-using-speech-from-different-languages-2305.11284"/></url>
<url><loc>https://scifaro.com/en/abs/are-microphone-signals-alone-sufficient-for-self-positioning-2305.11397</loc><lastmod>2023-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-microphone-signals-alone-sufficient-for-self-positioning-2305.11397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-microphone-signals-alone-sufficient-for-self-positioning-2305.11397"/></url>
<url><loc>https://scifaro.com/en/abs/syllable-discovery-and-cross-lingual-generalization-in-a-visually-grounded-self-supervised-speech-model-2305.11435</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syllable-discovery-and-cross-lingual-generalization-in-a-visually-grounded-self-supervised-speech-model-2305.11435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syllable-discovery-and-cross-lingual-generalization-in-a-visually-grounded-self-supervised-speech-model-2305.11435"/></url>
<url><loc>https://scifaro.com/en/abs/delay-penalized-ctc-implemented-based-on-finite-state-transducer-2305.11539</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/delay-penalized-ctc-implemented-based-on-finite-state-transducer-2305.11539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/delay-penalized-ctc-implemented-based-on-finite-state-transducer-2305.11539"/></url>
<url><loc>https://scifaro.com/en/abs/blank-regularized-ctc-for-frame-skipping-in-neural-transducer-2305.11558</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blank-regularized-ctc-for-frame-skipping-in-neural-transducer-2305.11558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blank-regularized-ctc-for-frame-skipping-in-neural-transducer-2305.11558"/></url>
<url><loc>https://scifaro.com/en/abs/language-universal-phonetic-representation-in-multilingual-speech-pretraining-for-low-resource-speech-recognition-2305.11569</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-universal-phonetic-representation-in-multilingual-speech-pretraining-for-low-resource-speech-recognition-2305.11569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-universal-phonetic-representation-in-multilingual-speech-pretraining-for-low-resource-speech-recognition-2305.11569"/></url>
<url><loc>https://scifaro.com/en/abs/bat-boundary-aware-transducer-for-memory-efficient-and-low-latency-asr-2305.11571</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bat-boundary-aware-transducer-for-memory-efficient-and-low-latency-asr-2305.11571"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bat-boundary-aware-transducer-for-memory-efficient-and-low-latency-asr-2305.11571"/></url>
<url><loc>https://scifaro.com/en/abs/language-universal-phonetic-encoder-for-low-resource-speech-recognition-2305.11576</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-universal-phonetic-encoder-for-low-resource-speech-recognition-2305.11576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-universal-phonetic-encoder-for-low-resource-speech-recognition-2305.11576"/></url>
<url><loc>https://scifaro.com/en/abs/recycle-and-distill-universal-compression-strategy-for-transformer-based-speech-ssl-models-with-attention-map-reusing-and-masking-distillation-2305.11685</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recycle-and-distill-universal-compression-strategy-for-transformer-based-speech-ssl-models-with-attention-map-reusing-and-masking-distillation-2305.11685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recycle-and-distill-universal-compression-strategy-for-transformer-based-speech-ssl-models-with-attention-map-reusing-and-masking-distillation-2305.11685"/></url>
<url><loc>https://scifaro.com/en/abs/pengi-an-audio-language-model-for-audio-tasks-2305.11834</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pengi-an-audio-language-model-for-audio-tasks-2305.11834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pengi-an-audio-language-model-for-audio-tasks-2305.11834"/></url>
<url><loc>https://scifaro.com/en/abs/north-s-a-mi-dialect-identification-with-self-supervised-speech-models-2305.11864</loc><lastmod>2023-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/north-s-a-mi-dialect-identification-with-self-supervised-speech-models-2305.11864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/north-s-a-mi-dialect-identification-with-self-supervised-speech-models-2305.11864"/></url>
<url><loc>https://scifaro.com/en/abs/joint-generative-contrastive-representation-learning-for-anomalous-sound-detection-2305.12111</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-generative-contrastive-representation-learning-for-anomalous-sound-detection-2305.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-generative-contrastive-representation-learning-for-anomalous-sound-detection-2305.12111"/></url>
<url><loc>https://scifaro.com/en/abs/dccrn-kws-an-audio-bias-based-model-for-noise-robust-small-footprint-keyword-spotting-2305.12331</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dccrn-kws-an-audio-bias-based-model-for-noise-robust-small-footprint-keyword-spotting-2305.12331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dccrn-kws-an-audio-bias-based-model-for-noise-robust-small-footprint-keyword-spotting-2305.12331"/></url>
<url><loc>https://scifaro.com/en/abs/dualvc-dual-mode-voice-conversion-using-intra-model-knowledge-distillation-and-hybrid-predictive-coding-2305.12425</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualvc-dual-mode-voice-conversion-using-intra-model-knowledge-distillation-and-hybrid-predictive-coding-2305.12425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualvc-dual-mode-voice-conversion-using-intra-model-knowledge-distillation-and-hybrid-predictive-coding-2305.12425"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-vad-low-latency-voice-activity-detection-for-speech-interaction-2305.12450</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-vad-low-latency-voice-activity-detection-for-speech-interaction-2305.12450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-vad-low-latency-voice-activity-detection-for-speech-interaction-2305.12450"/></url>
<url><loc>https://scifaro.com/en/abs/casa-asr-context-aware-speaker-attributed-asr-2305.12459</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/casa-asr-context-aware-speaker-attributed-asr-2305.12459"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/casa-asr-context-aware-speaker-attributed-asr-2305.12459"/></url>
<url><loc>https://scifaro.com/en/abs/contextualized-end-to-end-speech-recognition-with-contextual-phrase-prediction-network-2305.12493</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextualized-end-to-end-speech-recognition-with-contextual-phrase-prediction-network-2305.12493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextualized-end-to-end-speech-recognition-with-contextual-phrase-prediction-network-2305.12493"/></url>
<url><loc>https://scifaro.com/en/abs/multi-head-state-space-model-for-speech-recognition-2305.12498</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-head-state-space-model-for-speech-recognition-2305.12498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-head-state-space-model-for-speech-recognition-2305.12498"/></url>
<url><loc>https://scifaro.com/en/abs/towards-robust-family-infant-audio-analysis-based-on-unsupervised-pretraining-of-wav2vec-2-0-on-large-scale-unlabeled-family-audio-2305.12530</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-robust-family-infant-audio-analysis-based-on-unsupervised-pretraining-of-wav2vec-2-0-on-large-scale-unlabeled-family-audio-2305.12530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-robust-family-infant-audio-analysis-based-on-unsupervised-pretraining-of-wav2vec-2-0-on-large-scale-unlabeled-family-audio-2305.12530"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-efficacy-and-noise-robustness-of-jointly-learned-speech-emotion-and-automatic-speech-recognition-2305.12540</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-efficacy-and-noise-robustness-of-jointly-learned-speech-emotion-and-automatic-speech-recognition-2305.12540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-efficacy-and-noise-robustness-of-jointly-learned-speech-emotion-and-automatic-speech-recognition-2305.12540"/></url>
<url><loc>https://scifaro.com/en/abs/vit-tts-visual-text-to-speech-with-scalable-diffusion-transformer-2305.12708</loc><lastmod>2024-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vit-tts-visual-text-to-speech-with-scalable-diffusion-transformer-2305.12708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vit-tts-visual-text-to-speech-with-scalable-diffusion-transformer-2305.12708"/></url>
<url><loc>https://scifaro.com/en/abs/coswara-a-respiratory-sounds-and-symptoms-dataset-for-remote-screening-of-sars-cov-2-infection-2305.12741</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coswara-a-respiratory-sounds-and-symptoms-dataset-for-remote-screening-of-sars-cov-2-infection-2305.12741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coswara-a-respiratory-sounds-and-symptoms-dataset-for-remote-screening-of-sars-cov-2-infection-2305.12741"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-end-to-end-spoken-language-understanding-via-cross-modal-selective-self-training-2305.12793</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-end-to-end-spoken-language-understanding-via-cross-modal-selective-self-training-2305.12793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-end-to-end-spoken-language-understanding-via-cross-modal-selective-self-training-2305.12793"/></url>
<url><loc>https://scifaro.com/en/abs/target-active-speaker-detection-with-audio-visual-cues-2305.12831</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-active-speaker-detection-with-audio-visual-cues-2305.12831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-active-speaker-detection-with-audio-visual-cues-2305.12831"/></url>
<url><loc>https://scifaro.com/en/abs/an-enhanced-res2net-with-local-and-global-feature-fusion-for-speaker-verification-2305.12838</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-enhanced-res2net-with-local-and-global-feature-fusion-for-speaker-verification-2305.12838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-enhanced-res2net-with-local-and-global-feature-fusion-for-speaker-verification-2305.12838"/></url>
<url><loc>https://scifaro.com/en/abs/zs-mstm-zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2305.12887</loc><lastmod>2023-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zs-mstm-zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2305.12887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zs-mstm-zero-shot-style-transfer-for-gesture-animation-driven-by-text-and-speech-using-adversarial-disentanglement-of-multimodal-style-encoding-2305.12887"/></url>
<url><loc>https://scifaro.com/en/abs/debiased-automatic-speech-recognition-for-dysarthric-speech-via-sample-reweighting-with-sample-affinity-test-2305.13108</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/debiased-automatic-speech-recognition-for-dysarthric-speech-via-sample-reweighting-with-sample-affinity-test-2305.13108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/debiased-automatic-speech-recognition-for-dysarthric-speech-via-sample-reweighting-with-sample-affinity-test-2305.13108"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-asr-via-cross-lingual-pseudo-labeling-2305.13330</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-asr-via-cross-lingual-pseudo-labeling-2305.13330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-asr-via-cross-lingual-pseudo-labeling-2305.13330"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-benchmark-of-aphasia-speech-recognition-and-detection-based-on-e-branchformer-and-multi-task-learning-2305.13331</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-benchmark-of-aphasia-speech-recognition-and-detection-based-on-e-branchformer-and-multi-task-learning-2305.13331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-benchmark-of-aphasia-speech-recognition-and-detection-based-on-e-branchformer-and-multi-task-learning-2305.13331"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-online-learning-for-keyword-spotting-2305.13332</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-online-learning-for-keyword-spotting-2305.13332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-online-learning-for-keyword-spotting-2305.13332"/></url>
<url><loc>https://scifaro.com/en/abs/modular-domain-adaptation-for-conformer-based-streaming-asr-2305.13408</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modular-domain-adaptation-for-conformer-based-streaming-asr-2305.13408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modular-domain-adaptation-for-conformer-based-streaming-asr-2305.13408"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stream-extension-of-variational-bayesian-hmm-clustering-ms-vbx-for-combined-end-to-end-and-vector-clustering-based-diarization-2305.13580</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stream-extension-of-variational-bayesian-hmm-clustering-ms-vbx-for-combined-end-to-end-and-vector-clustering-based-diarization-2305.13580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stream-extension-of-variational-bayesian-hmm-clustering-ms-vbx-for-combined-end-to-end-and-vector-clustering-based-diarization-2305.13580"/></url>
<url><loc>https://scifaro.com/en/abs/tranusr-phoneme-to-word-transcoder-based-unified-speech-representation-learning-for-cross-lingual-speech-recognition-2305.13629</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tranusr-phoneme-to-word-transcoder-based-unified-speech-representation-learning-for-cross-lingual-speech-recognition-2305.13629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tranusr-phoneme-to-word-transcoder-based-unified-speech-representation-learning-for-cross-lingual-speech-recognition-2305.13629"/></url>
<url><loc>https://scifaro.com/en/abs/mp-senet-a-speech-enhancement-model-with-parallel-denoising-of-magnitude-and-phase-spectra-2305.13686</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mp-senet-a-speech-enhancement-model-with-parallel-denoising-of-magnitude-and-phase-spectra-2305.13686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mp-senet-a-speech-enhancement-model-with-parallel-denoising-of-magnitude-and-phase-spectra-2305.13686"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-level-knowledge-distillation-for-class-incremental-end-to-end-spoken-language-understanding-2305.13899</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-level-knowledge-distillation-for-class-incremental-end-to-end-spoken-language-understanding-2305.13899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-level-knowledge-distillation-for-class-incremental-end-to-end-spoken-language-understanding-2305.13899"/></url>
<url><loc>https://scifaro.com/en/abs/efficientspeech-an-on-device-text-to-speech-model-2305.13905</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficientspeech-an-on-device-text-to-speech-model-2305.13905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficientspeech-an-on-device-text-to-speech-model-2305.13905"/></url>
<url><loc>https://scifaro.com/en/abs/eeg2vec-self-supervised-electroencephalographic-representation-learning-2305.13957</loc><lastmod>2023-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg2vec-self-supervised-electroencephalographic-representation-learning-2305.13957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg2vec-self-supervised-electroencephalographic-representation-learning-2305.13957"/></url>
<url><loc>https://scifaro.com/en/abs/patch-mix-contrastive-learning-with-audio-spectrogram-transformer-on-respiratory-sound-classification-2305.14032</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/patch-mix-contrastive-learning-with-audio-spectrogram-transformer-on-respiratory-sound-classification-2305.14032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/patch-mix-contrastive-learning-with-audio-spectrogram-transformer-on-respiratory-sound-classification-2305.14032"/></url>
<url><loc>https://scifaro.com/en/abs/masked-modeling-duo-for-speech-specializing-general-purpose-audio-representation-to-speech-using-denoising-distillation-2305.14079</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-modeling-duo-for-speech-specializing-general-purpose-audio-representation-to-speech-using-denoising-distillation-2305.14079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-modeling-duo-for-speech-specializing-general-purpose-audio-representation-to-speech-using-denoising-distillation-2305.14079"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-spoken-language-development-of-children-with-asd-using-pre-trained-speech-embeddings-2305.14117</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-spoken-language-development-of-children-with-asd-using-pre-trained-speech-embeddings-2305.14117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-spoken-language-development-of-children-with-asd-using-pre-trained-speech-embeddings-2305.14117"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-gap-in-visual-speech-recognition-between-normal-and-silent-speech-based-on-metric-learning-2305.14203</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-gap-in-visual-speech-recognition-between-normal-and-silent-speech-based-on-metric-learning-2305.14203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-gap-in-visual-speech-recognition-between-normal-and-silent-speech-based-on-metric-learning-2305.14203"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-transferability-of-whisper-based-representations-for-in-the-wild-cross-task-downstream-speech-applications-2305.14546</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-transferability-of-whisper-based-representations-for-in-the-wild-cross-task-downstream-speech-applications-2305.14546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-transferability-of-whisper-based-representations-for-in-the-wild-cross-task-downstream-speech-applications-2305.14546"/></url>
<url><loc>https://scifaro.com/en/abs/downstream-task-agnostic-speech-enhancement-with-self-supervised-representation-loss-2305.14723</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/downstream-task-agnostic-speech-enhancement-with-self-supervised-representation-loss-2305.14723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/downstream-task-agnostic-speech-enhancement-with-self-supervised-representation-loss-2305.14723"/></url>
<url><loc>https://scifaro.com/en/abs/p-vectors-a-parallel-coupled-tdnn-transformer-network-for-speaker-verification-2305.14778</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/p-vectors-a-parallel-coupled-tdnn-transformer-network-for-speaker-verification-2305.14778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/p-vectors-a-parallel-coupled-tdnn-transformer-network-for-speaker-verification-2305.14778"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-through-knowledge-distillation-2305.14933</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-through-knowledge-distillation-2305.14933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-through-knowledge-distillation-2305.14933"/></url>
<url><loc>https://scifaro.com/en/abs/music-representing-corpus-virtual-an-open-sourced-library-for-explorative-music-generation-sound-design-and-instrument-creation-with-artificial-intelligence-and-machine-learning-2305.14948</loc><lastmod>2023-05-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-representing-corpus-virtual-an-open-sourced-library-for-explorative-music-generation-sound-design-and-instrument-creation-with-artificial-intelligence-and-machine-learning-2305.14948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-representing-corpus-virtual-an-open-sourced-library-for-explorative-music-generation-sound-design-and-instrument-creation-with-artificial-intelligence-and-machine-learning-2305.14948"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-audio-inpainting-2305.15266</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-audio-inpainting-2305.15266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-audio-inpainting-2305.15266"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-attacker-also-benefits-from-self-supervised-pretrained-model-2305.15518</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-attacker-also-benefits-from-self-supervised-pretrained-model-2305.15518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-attacker-also-benefits-from-self-supervised-pretrained-model-2305.15518"/></url>
<url><loc>https://scifaro.com/en/abs/rand-robustness-aware-norm-decay-for-quantized-seq2seq-models-2305.15536</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rand-robustness-aware-norm-decay-for-quantized-seq2seq-models-2305.15536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rand-robustness-aware-norm-decay-for-quantized-seq2seq-models-2305.15536"/></url>
<url><loc>https://scifaro.com/en/abs/generalized-domain-adaptation-framework-for-parametric-back-end-in-speaker-recognition-2305.15567</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalized-domain-adaptation-framework-for-parametric-back-end-in-speaker-recognition-2305.15567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalized-domain-adaptation-framework-for-parametric-back-end-in-speaker-recognition-2305.15567"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-text-to-speech-synthesis-for-turkic-languages-using-transliteration-2305.15749</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-text-to-speech-synthesis-for-turkic-languages-using-transliteration-2305.15749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-text-to-speech-synthesis-for-turkic-languages-using-transliteration-2305.15749"/></url>
<url><loc>https://scifaro.com/en/abs/dddm-vc-decoupled-denoising-diffusion-models-with-disentangled-representation-and-prior-mixup-for-verified-robust-voice-conversion-2305.15816</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dddm-vc-decoupled-denoising-diffusion-models-with-disentangled-representation-and-prior-mixup-for-verified-robust-voice-conversion-2305.15816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dddm-vc-decoupled-denoising-diffusion-models-with-disentangled-representation-and-prior-mixup-for-verified-robust-voice-conversion-2305.15816"/></url>
<url><loc>https://scifaro.com/en/abs/improving-scheduled-sampling-for-neural-transducer-based-asr-2305.15958</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-scheduled-sampling-for-neural-transducer-based-asr-2305.15958"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-scheduled-sampling-for-neural-transducer-based-asr-2305.15958"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducer-based-target-speaker-asr-exploiting-parallel-mixture-single-talker-speech-data-2305.15971</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducer-based-target-speaker-asr-exploiting-parallel-mixture-single-talker-speech-data-2305.15971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-neural-transducer-based-target-speaker-asr-exploiting-parallel-mixture-single-talker-speech-data-2305.15971"/></url>
<url><loc>https://scifaro.com/en/abs/the-power-of-prosody-and-prosody-of-power-an-acoustic-analysis-of-finnish-parliamentary-speech-2305.16040</loc><lastmod>2023-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-power-of-prosody-and-prosody-of-power-an-acoustic-analysis-of-finnish-parliamentary-speech-2305.16040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-power-of-prosody-and-prosody-of-power-an-acoustic-analysis-of-finnish-parliamentary-speech-2305.16040"/></url>
<url><loc>https://scifaro.com/en/abs/asr-and-emotional-speech-a-word-level-investigation-of-the-mutual-impact-of-speech-and-emotion-recognition-2305.16065</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asr-and-emotional-speech-a-word-level-investigation-of-the-mutual-impact-of-speech-and-emotion-recognition-2305.16065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asr-and-emotional-speech-a-word-level-investigation-of-the-mutual-impact-of-speech-and-emotion-recognition-2305.16065"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-for-personality-perception-via-speech-emotion-recognition-2305.16076</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-for-personality-perception-via-speech-emotion-recognition-2305.16076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-for-personality-perception-via-speech-emotion-recognition-2305.16076"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-features-for-mispronunciation-detection-of-r-in-child-speech-sound-disorders-2305.16085</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-features-for-mispronunciation-detection-of-r-in-child-speech-sound-disorders-2305.16085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-speech-inversion-features-for-mispronunciation-detection-of-r-in-child-speech-sound-disorders-2305.16085"/></url>
<url><loc>https://scifaro.com/en/abs/classifying-rhoticity-of-r-in-speech-sound-disorder-using-age-and-sex-normalized-formants-2305.16111</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classifying-rhoticity-of-r-in-speech-sound-disorder-using-age-and-sex-normalized-formants-2305.16111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classifying-rhoticity-of-r-in-speech-sound-disorder-using-age-and-sex-normalized-formants-2305.16111"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-speech-pre-training-a-case-study-on-target-speech-recognition-2305.16286</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-speech-pre-training-a-case-study-on-target-speech-recognition-2305.16286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-speech-pre-training-a-case-study-on-target-speech-recognition-2305.16286"/></url>
<url><loc>https://scifaro.com/en/abs/audiodec-an-open-source-streaming-high-fidelity-neural-audio-codec-2305.16608</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiodec-an-open-source-streaming-high-fidelity-neural-audio-codec-2305.16608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiodec-an-open-source-streaming-high-fidelity-neural-audio-codec-2305.16608"/></url>
<url><loc>https://scifaro.com/en/abs/2-bit-conformer-quantization-for-automatic-speech-recognition-2305.16619</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/2-bit-conformer-quantization-for-automatic-speech-recognition-2305.16619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/2-bit-conformer-quantization-for-automatic-speech-recognition-2305.16619"/></url>
<url><loc>https://scifaro.com/en/abs/abc-kd-attention-based-compression-knowledge-distillation-for-deep-learning-based-noise-suppression-2305.16665</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/abc-kd-attention-based-compression-knowledge-distillation-for-deep-learning-based-noise-suppression-2305.16665"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/abc-kd-attention-based-compression-knowledge-distillation-for-deep-learning-based-noise-suppression-2305.16665"/></url>
<url><loc>https://scifaro.com/en/abs/learning-representation-of-therapist-empathy-in-counseling-conversation-using-siamese-hierarchical-attention-network-2305.16690</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-representation-of-therapist-empathy-in-counseling-conversation-using-siamese-hierarchical-attention-network-2305.16690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-representation-of-therapist-empathy-in-counseling-conversation-using-siamese-hierarchical-attention-network-2305.16690"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-tuning-of-loss-trade-offs-without-hyper-parameter-search-in-end-to-end-zero-shot-speech-synthesis-2305.16699</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-tuning-of-loss-trade-offs-without-hyper-parameter-search-in-end-to-end-zero-shot-speech-synthesis-2305.16699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-tuning-of-loss-trade-offs-without-hyper-parameter-search-in-end-to-end-zero-shot-speech-synthesis-2305.16699"/></url>
<url><loc>https://scifaro.com/en/abs/electrodenet-a-deep-learning-based-sound-coding-strategy-for-cochlear-implants-2305.16753</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/electrodenet-a-deep-learning-based-sound-coding-strategy-for-cochlear-implants-2305.16753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/electrodenet-a-deep-learning-based-sound-coding-strategy-for-cochlear-implants-2305.16753"/></url>
<url><loc>https://scifaro.com/en/abs/neural-modeling-of-magnetic-tape-recorders-2305.16862</loc><lastmod>2023-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-modeling-of-magnetic-tape-recorders-2305.16862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-modeling-of-magnetic-tape-recorders-2305.16862"/></url>
<url><loc>https://scifaro.com/en/abs/one-step-knowledge-distillation-and-fine-tuning-in-using-large-pre-trained-self-supervised-learning-models-for-speaker-verification-2305.17394</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-step-knowledge-distillation-and-fine-tuning-in-using-large-pre-trained-self-supervised-learning-models-for-speaker-verification-2305.17394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-step-knowledge-distillation-and-fine-tuning-in-using-large-pre-trained-self-supervised-learning-models-for-speaker-verification-2305.17394"/></url>
<url><loc>https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-post-glossectomy-speech-with-guided-diffusion-models-2305.17436</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-post-glossectomy-speech-with-guided-diffusion-models-2305.17436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-post-glossectomy-speech-with-guided-diffusion-models-2305.17436"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-language-audio-models-as-few-shot-audio-learners-2305.17719</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-language-audio-models-as-few-shot-audio-learners-2305.17719"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-language-audio-models-as-few-shot-audio-learners-2305.17719"/></url>
<url><loc>https://scifaro.com/en/abs/stochastic-pitch-prediction-improves-the-diversity-and-naturalness-of-speech-in-glow-tts-2305.17724</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stochastic-pitch-prediction-improves-the-diversity-and-naturalness-of-speech-in-glow-tts-2305.17724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stochastic-pitch-prediction-improves-the-diversity-and-naturalness-of-speech-in-glow-tts-2305.17724"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-evaluation-of-turn-taking-cues-in-conversational-speech-synthesis-2305.17971</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-evaluation-of-turn-taking-cues-in-conversational-speech-synthesis-2305.17971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-evaluation-of-turn-taking-cues-in-conversational-speech-synthesis-2305.17971"/></url>
<url><loc>https://scifaro.com/en/abs/an-experimental-review-of-speaker-diarization-methods-with-application-to-two-speaker-conversational-telephone-speech-recordings-2305.18074</loc><lastmod>2023-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-experimental-review-of-speaker-diarization-methods-with-application-to-two-speaker-conversational-telephone-speech-recordings-2305.18074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-experimental-review-of-speaker-diarization-methods-with-application-to-two-speaker-conversational-telephone-speech-recordings-2305.18074"/></url>
<url><loc>https://scifaro.com/en/abs/a-hierarchical-context-aware-modeling-approach-for-multi-aspect-and-multi-granular-pronunciation-assessment-2305.18146</loc><lastmod>2023-06-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hierarchical-context-aware-modeling-approach-for-multi-aspect-and-multi-granular-pronunciation-assessment-2305.18146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hierarchical-context-aware-modeling-approach-for-multi-aspect-and-multi-granular-pronunciation-assessment-2305.18146"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-design-of-a-micro-perforated-panel-absorber-with-8-6-octave-bands-2305.18298</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-design-of-a-micro-perforated-panel-absorber-with-8-6-octave-bands-2305.18298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-design-of-a-micro-perforated-panel-absorber-with-8-6-octave-bands-2305.18298"/></url>
<url><loc>https://scifaro.com/en/abs/decor-defy-knowledge-forgetting-by-predicting-earlier-audio-codes-2305.18441</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decor-defy-knowledge-forgetting-by-predicting-earlier-audio-codes-2305.18441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decor-defy-knowledge-forgetting-by-predicting-earlier-audio-codes-2305.18441"/></url>
<url><loc>https://scifaro.com/en/abs/transforming-the-embeddings-a-lightweight-technique-for-speech-emotion-recognition-tasks-2305.18640</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transforming-the-embeddings-a-lightweight-technique-for-speech-emotion-recognition-tasks-2305.18640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transforming-the-embeddings-a-lightweight-technique-for-speech-emotion-recognition-tasks-2305.18640"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-speech-restoration-guided-by-self-supervised-speech-representation-2305.18739</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-speech-restoration-guided-by-self-supervised-speech-representation-2305.18739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-speech-restoration-guided-by-self-supervised-speech-representation-2305.18739"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-multi-lingual-asr-models-for-handling-multiple-talkers-2305.18747</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-multi-lingual-asr-models-for-handling-multiple-talkers-2305.18747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-multi-lingual-asr-models-for-handling-multiple-talkers-2305.18747"/></url>
<url><loc>https://scifaro.com/en/abs/dual-transformer-decoder-based-features-fusion-network-for-automated-audio-captioning-2305.18753</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-transformer-decoder-based-features-fusion-network-for-automated-audio-captioning-2305.18753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-transformer-decoder-based-features-fusion-network-for-automated-audio-captioning-2305.18753"/></url>
<url><loc>https://scifaro.com/en/abs/libritts-r-a-restored-multi-speaker-text-to-speech-corpus-2305.18802</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libritts-r-a-restored-multi-speaker-text-to-speech-corpus-2305.18802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libritts-r-a-restored-multi-speaker-text-to-speech-corpus-2305.18802"/></url>
<url><loc>https://scifaro.com/en/abs/merlion-ccs-challenge-a-english-mandarin-code-switching-child-directed-speech-corpus-for-language-identification-and-diarization-2305.18881</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/merlion-ccs-challenge-a-english-mandarin-code-switching-child-directed-speech-corpus-for-language-identification-and-diarization-2305.18881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/merlion-ccs-challenge-a-english-mandarin-code-switching-child-directed-speech-corpus-for-language-identification-and-diarization-2305.18881"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-model-performance-in-language-identification-beyond-simple-error-statistics-2305.18925</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-model-performance-in-language-identification-beyond-simple-error-statistics-2305.18925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-model-performance-in-language-identification-beyond-simple-error-statistics-2305.18925"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-with-just-nearest-neighbors-2305.18975</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-with-just-nearest-neighbors-2305.18975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-with-just-nearest-neighbors-2305.18975"/></url>
<url><loc>https://scifaro.com/en/abs/minisuperb-lightweight-benchmark-for-self-supervised-speech-models-2305.19011</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minisuperb-lightweight-benchmark-for-self-supervised-speech-models-2305.19011"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minisuperb-lightweight-benchmark-for-self-supervised-speech-models-2305.19011"/></url>
<url><loc>https://scifaro.com/en/abs/towards-single-integrated-spoofing-aware-speaker-verification-embeddings-2305.19051</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-single-integrated-spoofing-aware-speaker-verification-embeddings-2305.19051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-single-integrated-spoofing-aware-speaker-verification-embeddings-2305.19051"/></url>
<url><loc>https://scifaro.com/en/abs/prospective-validation-of-motor-based-intervention-with-automated-mispronunciation-detection-of-rhotics-in-residual-speech-sound-disorders-2305.19090</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prospective-validation-of-motor-based-intervention-with-automated-mispronunciation-detection-of-rhotics-in-residual-speech-sound-disorders-2305.19090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prospective-validation-of-motor-based-intervention-with-automated-mispronunciation-detection-of-rhotics-in-residual-speech-sound-disorders-2305.19090"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-preferred-dialogue-to-background-loudness-difference-in-dialogue-separated-audio-2305.19100</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-preferred-dialogue-to-background-loudness-difference-in-dialogue-separated-audio-2305.19100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-preferred-dialogue-to-background-loudness-difference-in-dialogue-separated-audio-2305.19100"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-semantic-information-for-efficient-self-supervised-emotion-recognition-with-audio-textual-distilled-models-2305.19184</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-semantic-information-for-efficient-self-supervised-emotion-recognition-with-audio-textual-distilled-models-2305.19184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-semantic-information-for-efficient-self-supervised-emotion-recognition-with-audio-textual-distilled-models-2305.19184"/></url>
<url><loc>https://scifaro.com/en/abs/a-stutter-seldom-comes-alone-cross-corpus-stuttering-detection-as-a-multi-label-problem-2305.19255</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-stutter-seldom-comes-alone-cross-corpus-stuttering-detection-as-a-multi-label-problem-2305.19255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-stutter-seldom-comes-alone-cross-corpus-stuttering-detection-as-a-multi-label-problem-2305.19255"/></url>
<url><loc>https://scifaro.com/en/abs/make-a-voice-unified-voice-synthesis-with-discrete-representation-2305.19269</loc><lastmod>2023-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/make-a-voice-unified-voice-synthesis-with-discrete-representation-2305.19269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/make-a-voice-unified-voice-synthesis-with-discrete-representation-2305.19269"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-fine-tuning-strategies-for-automatic-mos-prediction-in-text-to-speech-for-low-resource-languages-2305.19396</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-fine-tuning-strategies-for-automatic-mos-prediction-in-text-to-speech-for-low-resource-languages-2305.19396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-fine-tuning-strategies-for-automatic-mos-prediction-in-text-to-speech-for-low-resource-languages-2305.19396"/></url>
<url><loc>https://scifaro.com/en/abs/merlion-ccs-challenge-evaluation-plan-2305.19493</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/merlion-ccs-challenge-evaluation-plan-2305.19493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/merlion-ccs-challenge-evaluation-plan-2305.19493"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-dynamically-expanded-classifier-with-self-attention-modified-prototypes-2305.19539</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-dynamically-expanded-classifier-with-self-attention-modified-prototypes-2305.19539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-dynamically-expanded-classifier-with-self-attention-modified-prototypes-2305.19539"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-speaker-identification-using-lightweight-prototypical-network-with-feature-grouping-and-interaction-2305.19541</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-speaker-identification-using-lightweight-prototypical-network-with-feature-grouping-and-interaction-2305.19541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-speaker-identification-using-lightweight-prototypical-network-with-feature-grouping-and-interaction-2305.19541"/></url>
<url><loc>https://scifaro.com/en/abs/fn-ssl-full-band-and-narrow-band-fusion-for-sound-source-localization-2305.19610</loc><lastmod>2023-06-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fn-ssl-full-band-and-narrow-band-fusion-for-sound-source-localization-2305.19610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fn-ssl-full-band-and-narrow-band-fusion-for-sound-source-localization-2305.19610"/></url>
<url><loc>https://scifaro.com/en/abs/vilas-exploring-the-effects-of-vision-and-language-context-in-automatic-speech-recognition-2305.19972</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vilas-exploring-the-effects-of-vision-and-language-context-in-automatic-speech-recognition-2305.19972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vilas-exploring-the-effects-of-vision-and-language-context-in-automatic-speech-recognition-2305.19972"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-separation-in-noisy-environments-with-a-lightweight-iterative-model-2306.00160</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-separation-in-noisy-environments-with-a-lightweight-iterative-model-2306.00160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-separation-in-noisy-environments-with-a-lightweight-iterative-model-2306.00160"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-speech-inversion-for-estimation-of-nasalance-2306.00203</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-speech-inversion-for-estimation-of-nasalance-2306.00203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-speech-inversion-for-estimation-of-nasalance-2306.00203"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-dimensional-deep-structured-state-space-approach-to-speech-enhancement-using-small-footprint-models-2306.00331</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-dimensional-deep-structured-state-space-approach-to-speech-enhancement-using-small-footprint-models-2306.00331"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-dimensional-deep-structured-state-space-approach-to-speech-enhancement-using-small-footprint-models-2306.00331"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-using-attentive-multi-scale-convolutional-recurrent-network-2306.00426</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-using-attentive-multi-scale-convolutional-recurrent-network-2306.00426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-using-attentive-multi-scale-convolutional-recurrent-network-2306.00426"/></url>
<url><loc>https://scifaro.com/en/abs/speech-self-supervised-representation-benchmarking-are-we-doing-it-right-2306.00452</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-self-supervised-representation-benchmarking-are-we-doing-it-right-2306.00452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-self-supervised-representation-benchmarking-are-we-doing-it-right-2306.00452"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-data-augmentation-for-domain-adapted-fine-tuning-of-self-supervised-speech-representations-2306.00481</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-data-augmentation-for-domain-adapted-fine-tuning-of-self-supervised-speech-representations-2306.00481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-data-augmentation-for-domain-adapted-fine-tuning-of-self-supervised-speech-representations-2306.00481"/></url>
<url><loc>https://scifaro.com/en/abs/frame-wise-and-overlap-robust-speaker-embeddings-for-meeting-diarization-2306.00625</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-wise-and-overlap-robust-speaker-embeddings-for-meeting-diarization-2306.00625"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-wise-and-overlap-robust-speaker-embeddings-for-meeting-diarization-2306.00625"/></url>
<url><loc>https://scifaro.com/en/abs/a-teacher-student-approach-for-extracting-informative-speaker-embeddings-from-speech-mixtures-2306.00634</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-teacher-student-approach-for-extracting-informative-speaker-embeddings-from-speech-mixtures-2306.00634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-teacher-student-approach-for-extracting-informative-speaker-embeddings-from-speech-mixtures-2306.00634"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-language-identification-system-for-english-mandarin-code-switching-child-directed-speech-2306.00736</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-language-identification-system-for-english-mandarin-code-switching-child-directed-speech-2306.00736"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-language-identification-system-for-english-mandarin-code-switching-child-directed-speech-2306.00736"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-enhancement-using-learnable-comb-filter-for-light-weight-full-band-speech-enhancement-model-2306.00812</loc><lastmod>2023-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-enhancement-using-learnable-comb-filter-for-light-weight-full-band-speech-enhancement-model-2306.00812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-enhancement-using-learnable-comb-filter-for-light-weight-full-band-speech-enhancement-model-2306.00812"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-framework-for-end-to-end-imposter-identification-in-unseen-speaker-recognition-2306.00952</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-framework-for-end-to-end-imposter-identification-in-unseen-speaker-recognition-2306.00952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-framework-for-end-to-end-imposter-identification-in-unseen-speaker-recognition-2306.00952"/></url>
<url><loc>https://scifaro.com/en/abs/weakly-supervised-forced-alignment-of-disfluent-speech-using-phoneme-level-modeling-2306.00996</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/weakly-supervised-forced-alignment-of-disfluent-speech-using-phoneme-level-modeling-2306.00996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/weakly-supervised-forced-alignment-of-disfluent-speech-using-phoneme-level-modeling-2306.00996"/></url>
<url><loc>https://scifaro.com/en/abs/towards-selection-of-text-to-speech-data-to-augment-asr-training-2306.00998</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-selection-of-text-to-speech-data-to-augment-asr-training-2306.00998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-selection-of-text-to-speech-data-to-augment-asr-training-2306.00998"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-ship-radiated-noise-recognition-with-learnable-fine-grained-wavelet-transform-2306.01002</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-ship-radiated-noise-recognition-with-learnable-fine-grained-wavelet-transform-2306.01002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-ship-radiated-noise-recognition-with-learnable-fine-grained-wavelet-transform-2306.01002"/></url>
<url><loc>https://scifaro.com/en/abs/alo-vc-any-to-any-low-latency-one-shot-voice-conversion-2306.01100</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alo-vc-any-to-any-low-latency-one-shot-voice-conversion-2306.01100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alo-vc-any-to-any-low-latency-one-shot-voice-conversion-2306.01100"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-an-unadaptable-asr-system-2306.01208</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-an-unadaptable-asr-system-2306.01208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-an-unadaptable-asr-system-2306.01208"/></url>
<url><loc>https://scifaro.com/en/abs/tensor-decomposition-for-minimization-of-e2e-slu-model-toward-on-device-processing-2306.01247</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tensor-decomposition-for-minimization-of-e2e-slu-model-toward-on-device-processing-2306.01247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tensor-decomposition-for-minimization-of-e2e-slu-model-toward-on-device-processing-2306.01247"/></url>
<url><loc>https://scifaro.com/en/abs/improved-training-for-end-to-end-streaming-automatic-speech-recognition-model-with-punctuation-2306.01296</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-training-for-end-to-end-streaming-automatic-speech-recognition-model-with-punctuation-2306.01296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-training-for-end-to-end-streaming-automatic-speech-recognition-model-with-punctuation-2306.01296"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-grey-box-modelling-of-phaser-effects-using-frame-based-spectral-processing-2306.01332</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-grey-box-modelling-of-phaser-effects-using-frame-based-spectral-processing-2306.01332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-grey-box-modelling-of-phaser-effects-using-frame-based-spectral-processing-2306.01332"/></url>
<url><loc>https://scifaro.com/en/abs/task-agnostic-structured-pruning-of-speech-representation-models-2306.01385</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-agnostic-structured-pruning-of-speech-representation-models-2306.01385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-agnostic-structured-pruning-of-speech-representation-models-2306.01385"/></url>
<url><loc>https://scifaro.com/en/abs/hd-demucs-general-speech-restoration-with-heterogeneous-decoders-2306.01411</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hd-demucs-general-speech-restoration-with-heterogeneous-decoders-2306.01411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hd-demucs-general-speech-restoration-with-heterogeneous-decoders-2306.01411"/></url>
<url><loc>https://scifaro.com/en/abs/active-noise-control-in-the-new-century-the-role-and-prospect-of-signal-processing-2306.01425</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-noise-control-in-the-new-century-the-role-and-prospect-of-signal-processing-2306.01425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-noise-control-in-the-new-century-the-role-and-prospect-of-signal-processing-2306.01425"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-score-based-generative-models-2306.01432</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-score-based-generative-models-2306.01432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-score-based-generative-models-2306.01432"/></url>
<url><loc>https://scifaro.com/en/abs/blind-audio-bandwidth-extension-a-diffusion-based-zero-shot-approach-2306.01433</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-audio-bandwidth-extension-a-diffusion-based-zero-shot-approach-2306.01433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-audio-bandwidth-extension-a-diffusion-based-zero-shot-approach-2306.01433"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-representation-effective-for-estimating-vocal-tract-information-2306.01522</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-representation-effective-for-estimating-vocal-tract-information-2306.01522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-representation-effective-for-estimating-vocal-tract-information-2306.01522"/></url>
<url><loc>https://scifaro.com/en/abs/on-crowdsourcing-design-with-comparison-category-rating-for-evaluating-speech-enhancement-algorithms-2306.01538</loc><lastmod>2023-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-crowdsourcing-design-with-comparison-category-rating-for-evaluating-speech-enhancement-algorithms-2306.01538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-crowdsourcing-design-with-comparison-category-rating-for-evaluating-speech-enhancement-algorithms-2306.01538"/></url>
<url><loc>https://scifaro.com/en/abs/non-uniform-speaker-disentanglement-for-depression-detection-from-raw-speech-signals-2306.01861</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-uniform-speaker-disentanglement-for-depression-detection-from-raw-speech-signals-2306.01861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-uniform-speaker-disentanglement-for-depression-detection-from-raw-speech-signals-2306.01861"/></url>
<url><loc>https://scifaro.com/en/abs/in-the-wild-speech-emotion-conversion-using-disentangled-self-supervised-representations-and-neural-vocoder-based-resynthesis-2306.01916</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-the-wild-speech-emotion-conversion-using-disentangled-self-supervised-representations-and-neural-vocoder-based-resynthesis-2306.01916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-the-wild-speech-emotion-conversion-using-disentangled-self-supervised-representations-and-neural-vocoder-based-resynthesis-2306.01916"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-independent-neural-formant-synthesis-2306.01957</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-independent-neural-formant-synthesis-2306.01957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-independent-neural-formant-synthesis-2306.01957"/></url>
<url><loc>https://scifaro.com/en/abs/sgem-test-time-adaptation-for-automatic-speech-recognition-via-sequential-level-generalized-entropy-minimization-2306.01981</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sgem-test-time-adaptation-for-automatic-speech-recognition-via-sequential-level-generalized-entropy-minimization-2306.01981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sgem-test-time-adaptation-for-automatic-speech-recognition-via-sequential-level-generalized-entropy-minimization-2306.01981"/></url>
<url><loc>https://scifaro.com/en/abs/why-we-should-report-the-details-in-subjective-evaluation-of-tts-more-rigorously-2306.02044</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-we-should-report-the-details-in-subjective-evaluation-of-tts-more-rigorously-2306.02044"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-we-should-report-the-details-in-subjective-evaluation-of-tts-more-rigorously-2306.02044"/></url>
<url><loc>https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-stochastic-classifier-2306.02053</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-stochastic-classifier-2306.02053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/few-shot-class-incremental-audio-classification-using-stochastic-classifier-2306.02053"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-data-augmentation-and-lightweight-resnet-2306.02054</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-data-augmentation-and-lightweight-resnet-2306.02054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-data-augmentation-and-lightweight-resnet-2306.02054"/></url>
<url><loc>https://scifaro.com/en/abs/speechgen-unlocking-the-generative-power-of-speech-language-models-with-prompts-2306.02207</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechgen-unlocking-the-generative-power-of-speech-language-models-with-prompts-2306.02207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechgen-unlocking-the-generative-power-of-speech-language-models-with-prompts-2306.02207"/></url>
<url><loc>https://scifaro.com/en/abs/influence-of-lossy-speech-codecs-on-hearing-aid-binaural-sound-source-localisation-using-dnns-2306.02344</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/influence-of-lossy-speech-codecs-on-hearing-aid-binaural-sound-source-localisation-using-dnns-2306.02344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/influence-of-lossy-speech-codecs-on-hearing-aid-binaural-sound-source-localisation-using-dnns-2306.02344"/></url>
<url><loc>https://scifaro.com/en/abs/singnet-a-real-time-singing-voice-beat-and-downbeat-tracking-system-2306.02372</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singnet-a-real-time-singing-voice-beat-and-downbeat-tracking-system-2306.02372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singnet-a-real-time-singing-voice-beat-and-downbeat-tracking-system-2306.02372"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-linear-acoustic-echo-cancellation-2306.02450</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-linear-acoustic-echo-cancellation-2306.02450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-deep-learning-based-adaptation-control-for-linear-acoustic-echo-cancellation-2306.02450"/></url>
<url><loc>https://scifaro.com/en/abs/otf-optimal-transport-based-fusion-of-supervised-and-self-supervised-learning-models-for-automatic-speech-recognition-2306.02541</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/otf-optimal-transport-based-fusion-of-supervised-and-self-supervised-learning-models-for-automatic-speech-recognition-2306.02541"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/otf-optimal-transport-based-fusion-of-supervised-and-self-supervised-learning-models-for-automatic-speech-recognition-2306.02541"/></url>
<url><loc>https://scifaro.com/en/abs/divided-spectro-temporal-attention-for-sound-event-localization-and-detection-in-real-scenes-for-dcase2023-challenge-2306.02591</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/divided-spectro-temporal-attention-for-sound-event-localization-and-detection-in-real-scenes-for-dcase2023-challenge-2306.02591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/divided-spectro-temporal-attention-for-sound-event-localization-and-detection-in-real-scenes-for-dcase2023-challenge-2306.02591"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-interpretable-and-generalizable-re-synchronization-model-for-cued-speech-based-on-a-multi-cuer-corpus-2306.02596</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-interpretable-and-generalizable-re-synchronization-model-for-cued-speech-based-on-a-multi-cuer-corpus-2306.02596"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-interpretable-and-generalizable-re-synchronization-model-for-cued-speech-based-on-a-multi-cuer-corpus-2306.02596"/></url>
<url><loc>https://scifaro.com/en/abs/effcrn-an-efficient-convolutional-recurrent-network-for-high-performance-speech-enhancement-2306.02778</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effcrn-an-efficient-convolutional-recurrent-network-for-high-performance-speech-enhancement-2306.02778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effcrn-an-efficient-convolutional-recurrent-network-for-high-performance-speech-enhancement-2306.02778"/></url>
<url><loc>https://scifaro.com/en/abs/vocoder-drift-in-x-vector-based-speaker-anonymization-2306.02892</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocoder-drift-in-x-vector-based-speaker-anonymization-2306.02892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocoder-drift-in-x-vector-based-speaker-anonymization-2306.02892"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-or-sequential-training-how-speech-representations-cooperate-in-a-multi-task-self-supervised-learning-system-2306.02972</loc><lastmod>2024-03-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-or-sequential-training-how-speech-representations-cooperate-in-a-multi-task-self-supervised-learning-system-2306.02972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-or-sequential-training-how-speech-representations-cooperate-in-a-multi-task-self-supervised-learning-system-2306.02972"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-behavior-of-intrusive-and-non-intrusive-speech-enhancement-metrics-in-predictive-and-generative-settings-2306.03014</loc><lastmod>2023-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-behavior-of-intrusive-and-non-intrusive-speech-enhancement-metrics-in-predictive-and-generative-settings-2306.03014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-behavior-of-intrusive-and-non-intrusive-speech-enhancement-metrics-in-predictive-and-generative-settings-2306.03014"/></url>
<url><loc>https://scifaro.com/en/abs/lipvoicer-generating-speech-from-silent-videos-guided-by-lip-reading-2306.03258</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipvoicer-generating-speech-from-silent-videos-guided-by-lip-reading-2306.03258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipvoicer-generating-speech-from-silent-videos-guided-by-lip-reading-2306.03258"/></url>
<url><loc>https://scifaro.com/en/abs/a-generative-framework-for-conversational-laughter-its-language-model-and-laughter-sound-synthesis-2306.03465</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generative-framework-for-conversational-laughter-its-language-model-and-laughter-sound-synthesis-2306.03465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generative-framework-for-conversational-laughter-its-language-model-and-laughter-sound-synthesis-2306.03465"/></url>
<url><loc>https://scifaro.com/en/abs/mega-tts-zero-shot-text-to-speech-at-scale-with-intrinsic-inductive-bias-2306.03509</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mega-tts-zero-shot-text-to-speech-at-scale-with-intrinsic-inductive-bias-2306.03509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mega-tts-zero-shot-text-to-speech-at-scale-with-intrinsic-inductive-bias-2306.03509"/></url>
<url><loc>https://scifaro.com/en/abs/experimenting-with-additive-margins-for-contrastive-self-supervised-speaker-verification-2306.03664</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experimenting-with-additive-margins-for-contrastive-self-supervised-speaker-verification-2306.03664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experimenting-with-additive-margins-for-contrastive-self-supervised-speaker-verification-2306.03664"/></url>
<url><loc>https://scifaro.com/en/abs/some-voices-are-too-common-building-fair-speech-recognition-systems-using-the-common-voice-dataset-2306.03773</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/some-voices-are-too-common-building-fair-speech-recognition-systems-using-the-common-voice-dataset-2306.03773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/some-voices-are-too-common-building-fair-speech-recognition-systems-using-the-common-voice-dataset-2306.03773"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-speech-to-confusion-network-speech-recognition-2306.03778</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-speech-to-confusion-network-speech-recognition-2306.03778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-speech-to-confusion-network-speech-recognition-2306.03778"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-robustness-of-arabic-speech-dialect-identification-2306.03789</loc><lastmod>2023-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-robustness-of-arabic-speech-dialect-identification-2306.03789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-robustness-of-arabic-speech-dialect-identification-2306.03789"/></url>
<url><loc>https://scifaro.com/en/abs/rescuespeech-a-german-corpus-for-speech-recognition-in-search-and-rescue-domain-2306.04054</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rescuespeech-a-german-corpus-for-speech-recognition-in-search-and-rescue-domain-2306.04054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rescuespeech-a-german-corpus-for-speech-recognition-in-search-and-rescue-domain-2306.04054"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-audio-teacher-student-transformer-for-both-clip-level-and-frame-level-tasks-2306.04186</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-audio-teacher-student-transformer-for-both-clip-level-and-frame-level-tasks-2306.04186"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-audio-teacher-student-transformer-for-both-clip-level-and-frame-level-tasks-2306.04186"/></url>
<url><loc>https://scifaro.com/en/abs/convolutional-recurrent-neural-network-with-attention-for-3d-speech-enhancement-2306.04987</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-network-with-attention-for-3d-speech-enhancement-2306.04987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/convolutional-recurrent-neural-network-with-attention-for-3d-speech-enhancement-2306.04987"/></url>
<url><loc>https://scifaro.com/en/abs/vifs-an-end-to-end-variational-inference-for-foley-sound-synthesis-2306.05004</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vifs-an-end-to-end-variational-inference-for-foley-sound-synthesis-2306.05004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vifs-an-end-to-end-variational-inference-for-foley-sound-synthesis-2306.05004"/></url>
<url><loc>https://scifaro.com/en/abs/matching-latent-encoding-for-audio-text-based-keyword-spotting-2306.05245</loc><lastmod>2023-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matching-latent-encoding-for-audio-text-based-keyword-spotting-2306.05245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matching-latent-encoding-for-audio-text-based-keyword-spotting-2306.05245"/></url>
<url><loc>https://scifaro.com/en/abs/latent-phrase-matching-for-dysarthric-speech-2306.05446</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-phrase-matching-for-dysarthric-speech-2306.05446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-phrase-matching-for-dysarthric-speech-2306.05446"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-scene-clustering-using-joint-optimization-of-deep-embedding-learning-and-clustering-iteration-2306.05621</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-scene-clustering-using-joint-optimization-of-deep-embedding-learning-and-clustering-iteration-2306.05621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-scene-clustering-using-joint-optimization-of-deep-embedding-learning-and-clustering-iteration-2306.05621"/></url>
<url><loc>https://scifaro.com/en/abs/domestic-activities-classification-from-audio-recordings-using-multi-scale-dilated-depthwise-separable-convolutional-network-2306.05624</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domestic-activities-classification-from-audio-recordings-using-multi-scale-dilated-depthwise-separable-convolutional-network-2306.05624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domestic-activities-classification-from-audio-recordings-using-multi-scale-dilated-depthwise-separable-convolutional-network-2306.05624"/></url>
<url><loc>https://scifaro.com/en/abs/learning-emotional-representations-from-imbalanced-speech-data-for-speech-emotion-recognition-and-emotional-text-to-speech-2306.05709</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-emotional-representations-from-imbalanced-speech-data-for-speech-emotion-recognition-and-emotional-text-to-speech-2306.05709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-emotional-representations-from-imbalanced-speech-data-for-speech-emotion-recognition-and-emotional-text-to-speech-2306.05709"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-upsampling-with-a-generative-adversarial-network-using-a-gnomonic-equiangular-projection-2306.05812</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-upsampling-with-a-generative-adversarial-network-using-a-gnomonic-equiangular-projection-2306.05812"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-upsampling-with-a-generative-adversarial-network-using-a-gnomonic-equiangular-projection-2306.05812"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-encoder-decoder-and-dual-path-conformer-for-comprehensive-feature-learning-in-speech-enhancement-2306.05861</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-encoder-decoder-and-dual-path-conformer-for-comprehensive-feature-learning-in-speech-enhancement-2306.05861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-encoder-decoder-and-dual-path-conformer-for-comprehensive-feature-learning-in-speech-enhancement-2306.05861"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-speech-separation-network-based-on-recurrent-fusion-dilated-convolution-and-channel-attention-2306.05887</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-speech-separation-network-based-on-recurrent-fusion-dilated-convolution-and-channel-attention-2306.05887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-speech-separation-network-based-on-recurrent-fusion-dilated-convolution-and-channel-attention-2306.05887"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embeddings-as-individuality-proxy-for-voice-stress-detection-2306.05915</loc><lastmod>2023-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embeddings-as-individuality-proxy-for-voice-stress-detection-2306.05915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embeddings-as-individuality-proxy-for-voice-stress-detection-2306.05915"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervsied-learning-based-sound-event-detection-using-freuqency-dynamic-convolution-with-large-kernel-attention-for-dcase-challenge-2023-task-4-2306.06461</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervsied-learning-based-sound-event-detection-using-freuqency-dynamic-convolution-with-large-kernel-attention-for-dcase-challenge-2023-task-4-2306.06461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervsied-learning-based-sound-event-detection-using-freuqency-dynamic-convolution-with-large-kernel-attention-for-dcase-challenge-2023-task-4-2306.06461"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-selective-off-screen-speech-extraction-2306.06495</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-selective-off-screen-speech-extraction-2306.06495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-with-selective-off-screen-speech-extraction-2306.06495"/></url>
<url><loc>https://scifaro.com/en/abs/what-can-an-accent-identifier-learn-probing-phonetic-and-prosodic-information-in-a-wav2vec2-based-accent-identification-model-2306.06524</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-can-an-accent-identifier-learn-probing-phonetic-and-prosodic-information-in-a-wav2vec2-based-accent-identification-model-2306.06524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-can-an-accent-identifier-learn-probing-phonetic-and-prosodic-information-in-a-wav2vec2-based-accent-identification-model-2306.06524"/></url>
<url><loc>https://scifaro.com/en/abs/hiddensinger-high-quality-singing-voice-synthesis-via-neural-audio-codec-and-latent-diffusion-models-2306.06814</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hiddensinger-high-quality-singing-voice-synthesis-via-neural-audio-codec-and-latent-diffusion-models-2306.06814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hiddensinger-high-quality-singing-voice-synthesis-via-neural-audio-codec-and-latent-diffusion-models-2306.06814"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-frequency-attention-alternative-to-cnn-frontends-for-automatic-speech-recognition-2306.06954</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-frequency-attention-alternative-to-cnn-frontends-for-automatic-speech-recognition-2306.06954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-frequency-attention-alternative-to-cnn-frontends-for-automatic-speech-recognition-2306.06954"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-dysarthric-speech-recognition-using-adapter-fusion-and-householder-transformation-2306.07090</loc><lastmod>2023-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-dysarthric-speech-recognition-using-adapter-fusion-and-householder-transformation-2306.07090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-dysarthric-speech-recognition-using-adapter-fusion-and-householder-transformation-2306.07090"/></url>
<url><loc>https://scifaro.com/en/abs/pausespeech-natural-speech-synthesis-via-pre-trained-language-model-and-pause-based-prosody-modeling-2306.07489</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pausespeech-natural-speech-synthesis-via-pre-trained-language-model-and-pause-based-prosody-modeling-2306.07489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pausespeech-natural-speech-synthesis-via-pre-trained-language-model-and-pause-based-prosody-modeling-2306.07489"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-verification-across-ages-investigating-deep-speaker-embedding-sensitivity-to-age-mismatch-in-enrollment-and-test-speech-2306.07501</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-verification-across-ages-investigating-deep-speaker-embedding-sensitivity-to-age-mismatch-in-enrollment-and-test-speech-2306.07501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-verification-across-ages-investigating-deep-speaker-embedding-sensitivity-to-age-mismatch-in-enrollment-and-test-speech-2306.07501"/></url>
<url><loc>https://scifaro.com/en/abs/statistical-beamformer-exploiting-non-stationarity-and-sparsity-with-spatially-constrained-ica-for-robust-speech-recognition-2306.07562</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/statistical-beamformer-exploiting-non-stationarity-and-sparsity-with-spatially-constrained-ica-for-robust-speech-recognition-2306.07562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/statistical-beamformer-exploiting-non-stationarity-and-sparsity-with-spatially-constrained-ica-for-robust-speech-recognition-2306.07562"/></url>
<url><loc>https://scifaro.com/en/abs/malafide-a-novel-adversarial-convolutive-noise-attack-against-deepfake-and-spoofing-detection-systems-2306.07655</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/malafide-a-novel-adversarial-convolutive-noise-attack-against-deepfake-and-spoofing-detection-systems-2306.07655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/malafide-a-novel-adversarial-convolutive-noise-attack-against-deepfake-and-spoofing-detection-systems-2306.07655"/></url>
<url><loc>https://scifaro.com/en/abs/styletts-2-towards-human-level-text-to-speech-through-style-diffusion-and-adversarial-training-with-large-speech-language-models-2306.07691</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styletts-2-towards-human-level-text-to-speech-through-style-diffusion-and-adversarial-training-with-large-speech-language-models-2306.07691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styletts-2-towards-human-level-text-to-speech-through-style-diffusion-and-adversarial-training-with-large-speech-language-models-2306.07691"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-deep-dynamical-generative-speech-and-noise-models-2306.07820</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-deep-dynamical-generative-speech-and-noise-models-2306.07820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-speech-enhancement-with-deep-dynamical-generative-speech-and-noise-models-2306.07820"/></url>
<url><loc>https://scifaro.com/en/abs/a-theory-of-unsupervised-speech-recognition-2306.07926</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-theory-of-unsupervised-speech-recognition-2306.07926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-theory-of-unsupervised-speech-recognition-2306.07926"/></url>
<url><loc>https://scifaro.com/en/abs/fooctts-generating-arabic-speech-with-acoustic-environment-for-football-commentator-2306.07936</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fooctts-generating-arabic-speech-with-acoustic-environment-for-football-commentator-2306.07936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fooctts-generating-arabic-speech-with-acoustic-environment-for-football-commentator-2306.07936"/></url>
<url><loc>https://scifaro.com/en/abs/speech-to-text-adapter-and-speech-to-entity-retriever-augmented-llms-for-speech-understanding-2306.07944</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-to-text-adapter-and-speech-to-entity-retriever-augmented-llms-for-speech-understanding-2306.07944"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-to-text-adapter-and-speech-to-entity-retriever-augmented-llms-for-speech-understanding-2306.07944"/></url>
<url><loc>https://scifaro.com/en/abs/improving-frame-level-classifier-for-word-timings-with-non-peaky-ctc-in-end-to-end-automatic-speech-recognition-2306.07949</loc><lastmod>2023-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-frame-level-classifier-for-word-timings-with-non-peaky-ctc-in-end-to-end-automatic-speech-recognition-2306.07949"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-frame-level-classifier-for-word-timings-with-non-peaky-ctc-in-end-to-end-automatic-speech-recognition-2306.07949"/></url>
<url><loc>https://scifaro.com/en/abs/cognitive-performance-in-open-plan-office-acoustic-simulations-effects-of-room-acoustics-and-semantics-but-not-spatial-separation-of-sound-sources-2306.08051</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cognitive-performance-in-open-plan-office-acoustic-simulations-effects-of-room-acoustics-and-semantics-but-not-spatial-separation-of-sound-sources-2306.08051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cognitive-performance-in-open-plan-office-acoustic-simulations-effects-of-room-acoustics-and-semantics-but-not-spatial-separation-of-sound-sources-2306.08051"/></url>
<url><loc>https://scifaro.com/en/abs/quantifying-spatial-audio-quality-impairment-2306.08053</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantifying-spatial-audio-quality-impairment-2306.08053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantifying-spatial-audio-quality-impairment-2306.08053"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-adapters-for-giant-speech-models-2306.08131</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-adapters-for-giant-speech-models-2306.08131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-adapters-for-giant-speech-models-2306.08131"/></url>
<url><loc>https://scifaro.com/en/abs/large-scale-language-model-rescoring-on-long-form-data-2306.08133</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-scale-language-model-rescoring-on-long-form-data-2306.08133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-scale-language-model-rescoring-on-long-form-data-2306.08133"/></url>
<url><loc>https://scifaro.com/en/abs/dctx-conformer-dynamic-context-carry-over-for-low-latency-unified-streaming-and-non-streaming-conformer-asr-2306.08175</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dctx-conformer-dynamic-context-carry-over-for-low-latency-unified-streaming-and-non-streaming-conformer-asr-2306.08175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dctx-conformer-dynamic-context-carry-over-for-low-latency-unified-streaming-and-non-streaming-conformer-asr-2306.08175"/></url>
<url><loc>https://scifaro.com/en/abs/feature-normalization-for-fine-tuning-self-supervised-models-in-speech-enhancement-2306.08406</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-normalization-for-fine-tuning-self-supervised-models-in-speech-enhancement-2306.08406"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-normalization-for-fine-tuning-self-supervised-models-in-speech-enhancement-2306.08406"/></url>
<url><loc>https://scifaro.com/en/abs/mcr-data2vec-2-0-improving-self-supervised-speech-pre-training-via-model-level-consistency-regularization-2306.08463</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mcr-data2vec-2-0-improving-self-supervised-speech-pre-training-via-model-level-consistency-regularization-2306.08463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mcr-data2vec-2-0-improving-self-supervised-speech-pre-training-via-model-level-consistency-regularization-2306.08463"/></url>
<url><loc>https://scifaro.com/en/abs/brudex-database-binaural-room-impulse-responses-with-uniformly-distributed-external-microphones-2306.08484</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/brudex-database-binaural-room-impulse-responses-with-uniformly-distributed-external-microphones-2306.08484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/brudex-database-binaural-room-impulse-responses-with-uniformly-distributed-external-microphones-2306.08484"/></url>
<url><loc>https://scifaro.com/en/abs/permutation-invariant-recurrent-neural-networks-for-sound-source-tracking-applications-2306.08510</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/permutation-invariant-recurrent-neural-networks-for-sound-source-tracking-applications-2306.08510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/permutation-invariant-recurrent-neural-networks-for-sound-source-tracking-applications-2306.08510"/></url>
<url><loc>https://scifaro.com/en/abs/scalable-complexity-steered-response-power-mapping-based-on-low-rank-and-sparse-interpolation-2306.08514</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scalable-complexity-steered-response-power-mapping-based-on-low-rank-and-sparse-interpolation-2306.08514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scalable-complexity-steered-response-power-mapping-based-on-low-rank-and-sparse-interpolation-2306.08514"/></url>
<url><loc>https://scifaro.com/en/abs/variance-preserving-based-interpolation-diffusion-models-for-speech-enhancement-2306.08527</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variance-preserving-based-interpolation-diffusion-models-for-speech-enhancement-2306.08527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variance-preserving-based-interpolation-diffusion-models-for-speech-enhancement-2306.08527"/></url>
<url><loc>https://scifaro.com/en/abs/unified-model-for-code-switching-speech-recognition-and-language-identification-based-on-a-concatenated-tokenizer-2306.08753</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-model-for-code-switching-speech-recognition-and-language-identification-based-on-a-concatenated-tokenizer-2306.08753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-model-for-code-switching-speech-recognition-and-language-identification-based-on-a-concatenated-tokenizer-2306.08753"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-active-noise-control-with-exterior-radiation-suppression-based-on-riemannian-optimization-2306.08855</loc><lastmod>2023-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-active-noise-control-with-exterior-radiation-suppression-based-on-riemannian-optimization-2306.08855"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-active-noise-control-with-exterior-radiation-suppression-based-on-riemannian-optimization-2306.08855"/></url>
<url><loc>https://scifaro.com/en/abs/time-domain-wideband-image-source-method-for-spherical-microphone-arrays-2306.09135</loc><lastmod>2023-08-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-domain-wideband-image-source-method-for-spherical-microphone-arrays-2306.09135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-domain-wideband-image-source-method-for-spherical-microphone-arrays-2306.09135"/></url>
<url><loc>https://scifaro.com/en/abs/lexical-speaker-error-correction-leveraging-language-models-for-speaker-diarization-error-correction-2306.09313</loc><lastmod>2023-06-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lexical-speaker-error-correction-leveraging-language-models-for-speaker-diarization-error-correction-2306.09313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lexical-speaker-error-correction-leveraging-language-models-for-speaker-diarization-error-correction-2306.09313"/></url>
<url><loc>https://scifaro.com/en/abs/mfsn-multi-perspective-fusion-search-network-for-pre-training-knowledge-in-speech-emotion-recognition-2306.09361</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfsn-multi-perspective-fusion-search-network-for-pre-training-knowledge-in-speech-emotion-recognition-2306.09361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfsn-multi-perspective-fusion-search-network-for-pre-training-knowledge-in-speech-emotion-recognition-2306.09361"/></url>
<url><loc>https://scifaro.com/en/abs/mobileasr-a-resource-aware-on-device-learning-framework-for-user-voice-personalization-applications-on-mobile-phones-2306.09384</loc><lastmod>2023-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mobileasr-a-resource-aware-on-device-learning-framework-for-user-voice-personalization-applications-on-mobile-phones-2306.09384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mobileasr-a-resource-aware-on-device-learning-framework-for-user-voice-personalization-applications-on-mobile-phones-2306.09384"/></url>
<url><loc>https://scifaro.com/en/abs/diff-ttsg-denoising-probabilistic-integrated-speech-and-gesture-synthesis-2306.09417</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-ttsg-denoising-probabilistic-integrated-speech-and-gesture-synthesis-2306.09417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-ttsg-denoising-probabilistic-integrated-speech-and-gesture-synthesis-2306.09417"/></url>
<url><loc>https://scifaro.com/en/abs/distillation-strategies-for-discriminative-speech-recognition-rescoring-2306.09452</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distillation-strategies-for-discriminative-speech-recognition-rescoring-2306.09452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distillation-strategies-for-discriminative-speech-recognition-rescoring-2306.09452"/></url>
<url><loc>https://scifaro.com/en/abs/mf-pam-accurate-pitch-estimation-through-periodicity-analysis-and-multi-level-feature-fusion-2306.09640</loc><lastmod>2025-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mf-pam-accurate-pitch-estimation-through-periodicity-analysis-and-multi-level-feature-fusion-2306.09640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mf-pam-accurate-pitch-estimation-through-periodicity-analysis-and-multi-level-feature-fusion-2306.09640"/></url>
<url><loc>https://scifaro.com/en/abs/use-of-a-humanoid-robot-for-auditory-psychophysical-testing-2306.09714</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/use-of-a-humanoid-robot-for-auditory-psychophysical-testing-2306.09714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/use-of-a-humanoid-robot-for-auditory-psychophysical-testing-2306.09714"/></url>
<url><loc>https://scifaro.com/en/abs/fall-e-a-foley-sound-synthesis-model-and-strategies-2306.09807</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fall-e-a-foley-sound-synthesis-model-and-strategies-2306.09807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fall-e-a-foley-sound-synthesis-model-and-strategies-2306.09807"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-utility-of-surprisal-from-large-language-models-for-speech-synthesis-prosody-2306.09814</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-utility-of-surprisal-from-large-language-models-for-speech-synthesis-prosody-2306.09814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-utility-of-surprisal-from-large-language-models-for-speech-synthesis-prosody-2306.09814"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourcing-and-evaluating-text-based-audio-retrieval-relevances-2306.09820</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourcing-and-evaluating-text-based-audio-retrieval-relevances-2306.09820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourcing-and-evaluating-text-based-audio-retrieval-relevances-2306.09820"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distillation-for-efficient-audio-visual-video-captioning-2306.09947</loc><lastmod>2023-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distillation-for-efficient-audio-visual-video-captioning-2306.09947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distillation-for-efficient-audio-visual-video-captioning-2306.09947"/></url>
<url><loc>https://scifaro.com/en/abs/taming-diffusion-models-for-music-driven-conducting-motion-generation-2306.10065</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taming-diffusion-models-for-music-driven-conducting-motion-generation-2306.10065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taming-diffusion-models-for-music-driven-conducting-motion-generation-2306.10065"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-caption-fluency-with-automatic-error-correction-2306.10090</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-caption-fluency-with-automatic-error-correction-2306.10090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-caption-fluency-with-automatic-error-correction-2306.10090"/></url>
<url><loc>https://scifaro.com/en/abs/cml-tts-a-multilingual-dataset-for-speech-synthesis-in-low-resource-languages-2306.10097</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cml-tts-a-multilingual-dataset-for-speech-synthesis-in-low-resource-languages-2306.10097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cml-tts-a-multilingual-dataset-for-speech-synthesis-in-low-resource-languages-2306.10097"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-text-to-speech-using-specific-data-and-noise-augmentation-2306.10152</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-text-to-speech-using-specific-data-and-noise-augmentation-2306.10152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-text-to-speech-using-specific-data-and-noise-augmentation-2306.10152"/></url>
<url><loc>https://scifaro.com/en/abs/reliability-and-repeatability-of-iso-3382-3-metrics-based-on-repeated-acoustic-measurements-in-open-plan-offices-2306.10268</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reliability-and-repeatability-of-iso-3382-3-metrics-based-on-repeated-acoustic-measurements-in-open-plan-offices-2306.10268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reliability-and-repeatability-of-iso-3382-3-metrics-based-on-repeated-acoustic-measurements-in-open-plan-offices-2306.10268"/></url>
<url><loc>https://scifaro.com/en/abs/two-simultaneous-talkers-distract-more-than-one-in-simulated-multi-talker-environments-regardless-of-overall-sound-levels-typical-of-open-plan-offices-2306.10269</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-simultaneous-talkers-distract-more-than-one-in-simulated-multi-talker-environments-regardless-of-overall-sound-levels-typical-of-open-plan-offices-2306.10269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-simultaneous-talkers-distract-more-than-one-in-simulated-multi-talker-environments-regardless-of-overall-sound-levels-typical-of-open-plan-offices-2306.10269"/></url>
<url><loc>https://scifaro.com/en/abs/autophonic-loudness-of-singers-in-simulated-room-acoustic-environments-2306.10271</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autophonic-loudness-of-singers-in-simulated-room-acoustic-environments-2306.10271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autophonic-loudness-of-singers-in-simulated-room-acoustic-environments-2306.10271"/></url>
<url><loc>https://scifaro.com/en/abs/channel-spatial-based-few-shot-bird-sound-event-detection-2306.10499</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-spatial-based-few-shot-bird-sound-event-detection-2306.10499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-spatial-based-few-shot-bird-sound-event-detection-2306.10499"/></url>
<url><loc>https://scifaro.com/en/abs/lm-vc-zero-shot-voice-conversion-via-speech-generation-based-on-language-models-2306.10521</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lm-vc-zero-shot-voice-conversion-via-speech-generation-based-on-language-models-2306.10521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lm-vc-zero-shot-voice-conversion-via-speech-generation-based-on-language-models-2306.10521"/></url>
<url><loc>https://scifaro.com/en/abs/surt-2-0-advances-in-transducer-based-multi-talker-speech-recognition-2306.10559</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/surt-2-0-advances-in-transducer-based-multi-talker-speech-recognition-2306.10559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/surt-2-0-advances-in-transducer-based-multi-talker-speech-recognition-2306.10559"/></url>
<url><loc>https://scifaro.com/en/abs/hearing-lips-in-noise-universal-viseme-phoneme-mapping-and-transfer-for-robust-audio-visual-speech-recognition-2306.10563</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearing-lips-in-noise-universal-viseme-phoneme-mapping-and-transfer-for-robust-audio-visual-speech-recognition-2306.10563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearing-lips-in-noise-universal-viseme-phoneme-mapping-and-transfer-for-robust-audio-visual-speech-recognition-2306.10563"/></url>
<url><loc>https://scifaro.com/en/abs/mir-gan-refining-frame-level-modality-invariant-representations-with-adversarial-network-for-audio-visual-speech-recognition-2306.10567</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mir-gan-refining-frame-level-modality-invariant-representations-with-adversarial-network-for-audio-visual-speech-recognition-2306.10567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mir-gan-refining-frame-level-modality-invariant-representations-with-adversarial-network-for-audio-visual-speech-recognition-2306.10567"/></url>
<url><loc>https://scifaro.com/en/abs/duta-vc-a-duration-aware-typical-to-atypical-voice-conversion-approach-with-diffusion-probabilistic-model-2306.10588</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/duta-vc-a-duration-aware-typical-to-atypical-voice-conversion-approach-with-diffusion-probabilistic-model-2306.10588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/duta-vc-a-duration-aware-typical-to-atypical-voice-conversion-approach-with-diffusion-probabilistic-model-2306.10588"/></url>
<url><loc>https://scifaro.com/en/abs/rehearsal-free-online-continual-learning-for-automatic-speech-recognition-2306.10860</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rehearsal-free-online-continual-learning-for-automatic-speech-recognition-2306.10860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rehearsal-free-online-continual-learning-for-automatic-speech-recognition-2306.10860"/></url>
<url><loc>https://scifaro.com/en/abs/ecat-an-end-to-end-model-for-multi-speaker-tts-many-to-many-fine-grained-prosody-transfer-2306.11327</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecat-an-end-to-end-model-for-multi-speaker-tts-many-to-many-fine-grained-prosody-transfer-2306.11327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecat-an-end-to-end-model-for-multi-speaker-tts-many-to-many-fine-grained-prosody-transfer-2306.11327"/></url>
<url><loc>https://scifaro.com/en/abs/visually-grounded-few-shot-word-learning-in-low-resource-settings-2306.11371</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visually-grounded-few-shot-word-learning-in-low-resource-settings-2306.11371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visually-grounded-few-shot-word-learning-in-low-resource-settings-2306.11371"/></url>
<url><loc>https://scifaro.com/en/abs/a-computation-efficient-online-secondary-path-modeling-technique-for-modified-fxlms-algorithm-2306.11408</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computation-efficient-online-secondary-path-modeling-technique-for-modified-fxlms-algorithm-2306.11408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computation-efficient-online-secondary-path-modeling-technique-for-modified-fxlms-algorithm-2306.11408"/></url>
<url><loc>https://scifaro.com/en/abs/auditory-neural-response-inspired-sound-event-detection-based-on-spectro-temporal-receptive-field-2306.11427</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auditory-neural-response-inspired-sound-event-detection-based-on-spectro-temporal-receptive-field-2306.11427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auditory-neural-response-inspired-sound-event-detection-based-on-spectro-temporal-receptive-field-2306.11427"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-neural-representation-with-physics-informed-neural-networks-for-the-reconstruction-of-the-early-part-of-room-impulse-responses-2306.11509</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-neural-representation-with-physics-informed-neural-networks-for-the-reconstruction-of-the-early-part-of-room-impulse-responses-2306.11509"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-neural-representation-with-physics-informed-neural-networks-for-the-reconstruction-of-the-early-part-of-room-impulse-responses-2306.11509"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-prosody-transfer-for-expressive-machine-dubbing-2306.11658</loc><lastmod>2023-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-prosody-transfer-for-expressive-machine-dubbing-2306.11658"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-prosody-transfer-for-expressive-machine-dubbing-2306.11658"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-machine-dubbing-through-phrase-level-cross-lingual-prosody-transfer-2306.11662</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-machine-dubbing-through-phrase-level-cross-lingual-prosody-transfer-2306.11662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-machine-dubbing-through-phrase-level-cross-lingual-prosody-transfer-2306.11662"/></url>
<url><loc>https://scifaro.com/en/abs/on-frequency-wise-normalizations-for-better-recording-device-generalization-in-audio-spectrogram-transformers-2306.11764</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-frequency-wise-normalizations-for-better-recording-device-generalization-in-audio-spectrogram-transformers-2306.11764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-frequency-wise-normalizations-for-better-recording-device-generalization-in-audio-spectrogram-transformers-2306.11764"/></url>
<url><loc>https://scifaro.com/en/abs/learning-when-to-trust-which-teacher-for-weakly-supervised-asr-2306.12012</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-when-to-trust-which-teacher-for-weakly-supervised-asr-2306.12012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-when-to-trust-which-teacher-for-weakly-supervised-asr-2306.12012"/></url>
<url><loc>https://scifaro.com/en/abs/federated-self-learning-with-weak-supervision-for-speech-recognition-2306.12015</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/federated-self-learning-with-weak-supervision-for-speech-recognition-2306.12015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/federated-self-learning-with-weak-supervision-for-speech-recognition-2306.12015"/></url>
<url><loc>https://scifaro.com/en/abs/visual-aware-text-to-speech-2306.12020</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/visual-aware-text-to-speech-2306.12020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/visual-aware-text-to-speech-2306.12020"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-posterior-sampling-for-informed-single-channel-dereverberation-2306.12286</loc><lastmod>2023-06-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-posterior-sampling-for-informed-single-channel-dereverberation-2306.12286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-posterior-sampling-for-informed-single-channel-dereverberation-2306.12286"/></url>
<url><loc>https://scifaro.com/en/abs/depac-a-corpus-for-depression-and-anxiety-detection-from-speech-2306.12443</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/depac-a-corpus-for-depression-and-anxiety-detection-from-speech-2306.12443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/depac-a-corpus-for-depression-and-anxiety-detection-from-speech-2306.12443"/></url>
<url><loc>https://scifaro.com/en/abs/factors-affecting-the-performance-of-automated-speaker-verification-in-alzheimer-s-disease-clinical-trials-2306.12444</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/factors-affecting-the-performance-of-automated-speaker-verification-in-alzheimer-s-disease-clinical-trials-2306.12444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/factors-affecting-the-performance-of-automated-speaker-verification-in-alzheimer-s-disease-clinical-trials-2306.12444"/></url>
<url><loc>https://scifaro.com/en/abs/wind-noise-reduction-with-a-diffusion-based-stochastic-regeneration-model-2306.12867</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wind-noise-reduction-with-a-diffusion-based-stochastic-regeneration-model-2306.12867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wind-noise-reduction-with-a-diffusion-based-stochastic-regeneration-model-2306.12867"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-spoken-language-diarization-2306.12913</loc><lastmod>2023-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-spoken-language-diarization-2306.12913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-spoken-language-diarization-2306.12913"/></url>
<url><loc>https://scifaro.com/en/abs/towards-effective-and-compact-contextual-representation-for-conformer-transducer-speech-recognition-systems-2306.13307</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-effective-and-compact-contextual-representation-for-conformer-transducer-speech-recognition-systems-2306.13307"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-effective-and-compact-contextual-representation-for-conformer-transducer-speech-recognition-systems-2306.13307"/></url>
<url><loc>https://scifaro.com/en/abs/the-chime-7-dasr-challenge-distant-meeting-transcription-with-multiple-devices-in-diverse-scenarios-2306.13734</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-chime-7-dasr-challenge-distant-meeting-transcription-with-multiple-devices-in-diverse-scenarios-2306.13734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-chime-7-dasr-challenge-distant-meeting-transcription-with-multiple-devices-in-diverse-scenarios-2306.13734"/></url>
<url><loc>https://scifaro.com/en/abs/community-detection-graph-convolutional-network-for-overlap-aware-speaker-diarization-2306.14530</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/community-detection-graph-convolutional-network-for-overlap-aware-speaker-diarization-2306.14530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/community-detection-graph-convolutional-network-for-overlap-aware-speaker-diarization-2306.14530"/></url>
<url><loc>https://scifaro.com/en/abs/factorised-speaker-environment-adaptive-training-of-conformer-speech-recognition-systems-2306.14608</loc><lastmod>2023-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/factorised-speaker-environment-adaptive-training-of-conformer-speech-recognition-systems-2306.14608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/factorised-speaker-environment-adaptive-training-of-conformer-speech-recognition-systems-2306.14608"/></url>
<url><loc>https://scifaro.com/en/abs/wespeaker-baselines-for-voxsrc2023-2306.15161</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wespeaker-baselines-for-voxsrc2023-2306.15161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wespeaker-baselines-for-voxsrc2023-2306.15161"/></url>
<url><loc>https://scifaro.com/en/abs/hyper-parameter-adaptation-of-conformer-asr-systems-for-elderly-and-dysarthric-speech-recognition-2306.15265</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyper-parameter-adaptation-of-conformer-asr-systems-for-elderly-and-dysarthric-speech-recognition-2306.15265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyper-parameter-adaptation-of-conformer-asr-systems-for-elderly-and-dysarthric-speech-recognition-2306.15265"/></url>
<url><loc>https://scifaro.com/en/abs/genertts-pronunciation-disentanglement-for-timbre-and-style-generalization-in-cross-lingual-text-to-speech-2306.15304</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/genertts-pronunciation-disentanglement-for-timbre-and-style-generalization-in-cross-lingual-text-to-speech-2306.15304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/genertts-pronunciation-disentanglement-for-timbre-and-style-generalization-in-cross-lingual-text-to-speech-2306.15304"/></url>
<url><loc>https://scifaro.com/en/abs/post-processing-independent-evaluation-of-sound-event-detection-systems-2306.15440</loc><lastmod>2023-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/post-processing-independent-evaluation-of-sound-event-detection-systems-2306.15440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/post-processing-independent-evaluation-of-sound-event-detection-systems-2306.15440"/></url>
<url><loc>https://scifaro.com/en/abs/implementing-contextual-biasing-in-gpu-decoder-for-online-asr-2306.15685</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implementing-contextual-biasing-in-gpu-decoder-for-online-asr-2306.15685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implementing-contextual-biasing-in-gpu-decoder-for-online-asr-2306.15685"/></url>
<url><loc>https://scifaro.com/en/abs/master-asr-achieving-multilingual-scalability-and-low-resource-adaptation-in-asr-with-modular-learning-2306.15686</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/master-asr-achieving-multilingual-scalability-and-low-resource-adaptation-in-asr-with-modular-learning-2306.15686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/master-asr-achieving-multilingual-scalability-and-low-resource-adaptation-in-asr-with-modular-learning-2306.15686"/></url>
<url><loc>https://scifaro.com/en/abs/voicebox-text-guided-multilingual-universal-speech-generation-at-scale-2306.15687</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicebox-text-guided-multilingual-universal-speech-generation-at-scale-2306.15687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicebox-text-guided-multilingual-universal-speech-generation-at-scale-2306.15687"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-laws-for-discriminative-speech-recognition-rescoring-models-2306.15815</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-laws-for-discriminative-speech-recognition-rescoring-models-2306.15815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-laws-for-discriminative-speech-recognition-rescoring-models-2306.15815"/></url>
<url><loc>https://scifaro.com/en/abs/confidence-based-ensembles-of-end-to-end-speech-recognition-models-2306.15824</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/confidence-based-ensembles-of-end-to-end-speech-recognition-models-2306.15824"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/confidence-based-ensembles-of-end-to-end-speech-recognition-models-2306.15824"/></url>
<url><loc>https://scifaro.com/en/abs/two-stage-voice-anonymization-for-enhanced-privacy-2306.16069</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-stage-voice-anonymization-for-enhanced-privacy-2306.16069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-stage-voice-anonymization-for-enhanced-privacy-2306.16069"/></url>
<url><loc>https://scifaro.com/en/abs/long-term-conversation-analysis-exploring-utility-and-privacy-2306.16071</loc><lastmod>2023-06-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-term-conversation-analysis-exploring-utility-and-privacy-2306.16071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-term-conversation-analysis-exploring-utility-and-privacy-2306.16071"/></url>
<url><loc>https://scifaro.com/en/abs/computationally-efficient-and-perceptually-motivated-rendering-of-diffuse-reflections-in-room-acoustics-simulation-2306.16696</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computationally-efficient-and-perceptually-motivated-rendering-of-diffuse-reflections-in-room-acoustics-simulation-2306.16696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computationally-efficient-and-perceptually-motivated-rendering-of-diffuse-reflections-in-room-acoustics-simulation-2306.16696"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-f0-synthesis-for-speaker-anonymization-2306.16860</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-f0-synthesis-for-speaker-anonymization-2306.16860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-f0-synthesis-for-speaker-anonymization-2306.16860"/></url>
<url><loc>https://scifaro.com/en/abs/high-quality-automatic-voice-over-with-accurate-alignment-supervision-through-self-supervised-discrete-speech-units-2306.17005</loc><lastmod>2023-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-quality-automatic-voice-over-with-accurate-alignment-supervision-through-self-supervised-discrete-speech-units-2306.17005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-quality-automatic-voice-over-with-accurate-alignment-supervision-through-self-supervised-discrete-speech-units-2306.17005"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-virtual-acoustic-environments-with-different-acoustic-level-of-detail-2306.17012</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-virtual-acoustic-environments-with-different-acoustic-level-of-detail-2306.17012"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-virtual-acoustic-environments-with-different-acoustic-level-of-detail-2306.17012"/></url>
<url><loc>https://scifaro.com/en/abs/learning-multilingual-expressive-speech-representation-for-prosody-prediction-without-parallel-data-2306.17199</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-multilingual-expressive-speech-representation-for-prosody-prediction-without-parallel-data-2306.17199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-multilingual-expressive-speech-representation-for-prosody-prediction-without-parallel-data-2306.17199"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-synthesis-using-differentiable-lpc-and-glottal-flow-inspired-wavetables-2306.17252</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-synthesis-using-differentiable-lpc-and-glottal-flow-inspired-wavetables-2306.17252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-synthesis-using-differentiable-lpc-and-glottal-flow-inspired-wavetables-2306.17252"/></url>
<url><loc>https://scifaro.com/en/abs/modified-parametric-multichannel-wiener-filter-for-low-latency-enhancement-of-speech-mixtures-with-unknown-number-of-speakers-2306.17317</loc><lastmod>2023-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modified-parametric-multichannel-wiener-filter-for-low-latency-enhancement-of-speech-mixtures-with-unknown-number-of-speakers-2306.17317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modified-parametric-multichannel-wiener-filter-for-low-latency-enhancement-of-speech-mixtures-with-unknown-number-of-speakers-2306.17317"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-neural-on-neural-approaches-to-speaker-gender-protection-2306.17700</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-neural-on-neural-approaches-to-speaker-gender-protection-2306.17700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-neural-on-neural-approaches-to-speaker-gender-protection-2306.17700"/></url>
<url><loc>https://scifaro.com/en/abs/emospeech-guiding-fastspeech2-towards-emotional-text-to-speech-2307.00024</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emospeech-guiding-fastspeech2-towards-emotional-text-to-speech-2307.00024"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emospeech-guiding-fastspeech2-towards-emotional-text-to-speech-2307.00024"/></url>
<url><loc>https://scifaro.com/en/abs/voxwatch-an-open-set-speaker-recognition-benchmark-on-voxceleb-2307.00169</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxwatch-an-open-set-speaker-recognition-benchmark-on-voxceleb-2307.00169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxwatch-an-open-set-speaker-recognition-benchmark-on-voxceleb-2307.00169"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-the-eeg-speech-match-mismatch-tasks-with-word-boundaries-2307.00366</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-the-eeg-speech-match-mismatch-tasks-with-word-boundaries-2307.00366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-the-eeg-speech-match-mismatch-tasks-with-word-boundaries-2307.00366"/></url>
<url><loc>https://scifaro.com/en/abs/using-joint-training-speaker-encoder-with-consistency-loss-to-achieve-cross-lingual-voice-conversion-and-expressive-voice-conversion-2307.00393</loc><lastmod>2023-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-joint-training-speaker-encoder-with-consistency-loss-to-achieve-cross-lingual-voice-conversion-and-expressive-voice-conversion-2307.00393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-joint-training-speaker-encoder-with-consistency-loss-to-achieve-cross-lingual-voice-conversion-and-expressive-voice-conversion-2307.00393"/></url>
<url><loc>https://scifaro.com/en/abs/disentanglement-in-a-gan-for-unconditional-speech-synthesis-2307.01673</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentanglement-in-a-gan-for-unconditional-speech-synthesis-2307.01673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentanglement-in-a-gan-for-unconditional-speech-synthesis-2307.01673"/></url>
<url><loc>https://scifaro.com/en/abs/flowchase-a-mobile-application-for-pronunciation-training-2307.02051</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowchase-a-mobile-application-for-pronunciation-training-2307.02051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowchase-a-mobile-application-for-pronunciation-training-2307.02051"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-multilingual-transfer-for-unsupervised-semantic-acoustic-word-embeddings-2307.02083</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-multilingual-transfer-for-unsupervised-semantic-acoustic-word-embeddings-2307.02083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-multilingual-transfer-for-unsupervised-semantic-acoustic-word-embeddings-2307.02083"/></url>
<url><loc>https://scifaro.com/en/abs/a-database-with-directivities-of-musical-instruments-2307.02110</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-database-with-directivities-of-musical-instruments-2307.02110"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-database-with-directivities-of-musical-instruments-2307.02110"/></url>
<url><loc>https://scifaro.com/en/abs/differentially-private-adversarial-auto-encoder-to-protect-gender-in-voice-biometrics-2307.02135</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentially-private-adversarial-auto-encoder-to-protect-gender-in-voice-biometrics-2307.02135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentially-private-adversarial-auto-encoder-to-protect-gender-in-voice-biometrics-2307.02135"/></url>
<url><loc>https://scifaro.com/en/abs/why-can-big-bi-be-changed-to-bi-gbi-a-mathematical-model-of-syllabification-and-articulatory-synthesis-2307.02299</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-can-big-bi-be-changed-to-bi-gbi-a-mathematical-model-of-syllabification-and-articulatory-synthesis-2307.02299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-can-big-bi-be-changed-to-bi-gbi-a-mathematical-model-of-syllabification-and-articulatory-synthesis-2307.02299"/></url>
<url><loc>https://scifaro.com/en/abs/online-hybrid-ctc-attention-end-to-end-automatic-speech-recognition-architecture-2307.02351</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-hybrid-ctc-attention-end-to-end-automatic-speech-recognition-architecture-2307.02351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-hybrid-ctc-attention-end-to-end-automatic-speech-recognition-architecture-2307.02351"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-synthesis-from-mri-based-articulatory-representations-2307.02471</loc><lastmod>2023-07-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-mri-based-articulatory-representations-2307.02471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-mri-based-articulatory-representations-2307.02471"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-multimodal-approaches-for-alzheimer-s-disease-detection-using-patient-speech-transcript-and-audio-data-2307.02514</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-multimodal-approaches-for-alzheimer-s-disease-detection-using-patient-speech-transcript-and-audio-data-2307.02514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-multimodal-approaches-for-alzheimer-s-disease-detection-using-patient-speech-transcript-and-audio-data-2307.02514"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-end-to-end-multi-channel-speech-separation-dereverberation-and-recognition-2307.02909</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-end-to-end-multi-channel-speech-separation-dereverberation-and-recognition-2307.02909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-end-to-end-multi-channel-speech-separation-dereverberation-and-recognition-2307.02909"/></url>
<url><loc>https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-end-to-end-asr-2307.03088</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-end-to-end-asr-2307.03088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-end-to-end-asr-2307.03088"/></url>
<url><loc>https://scifaro.com/en/abs/recovering-implicit-pitch-contours-from-formants-in-whispered-speech-2307.03168</loc><lastmod>2023-07-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recovering-implicit-pitch-contours-from-formants-in-whispered-speech-2307.03168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recovering-implicit-pitch-contours-from-formants-in-whispered-speech-2307.03168"/></url>
<url><loc>https://scifaro.com/en/abs/gammatonegram-representation-for-end-to-end-dysarthric-speech-processing-tasks-speech-recognition-speaker-identification-and-intelligibility-assessment-2307.03296</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gammatonegram-representation-for-end-to-end-dysarthric-speech-processing-tasks-speech-recognition-speaker-identification-and-intelligibility-assessment-2307.03296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gammatonegram-representation-for-end-to-end-dysarthric-speech-processing-tasks-speech-recognition-speaker-identification-and-intelligibility-assessment-2307.03296"/></url>
<url><loc>https://scifaro.com/en/abs/on-decoder-only-architecture-for-speech-to-text-and-large-language-model-integration-2307.03917</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-decoder-only-architecture-for-speech-to-text-and-large-language-model-integration-2307.03917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-decoder-only-architecture-for-speech-to-text-and-large-language-model-integration-2307.03917"/></url>
<url><loc>https://scifaro.com/en/abs/ians-intelligibility-aware-null-steering-beamforming-for-dual-microphone-arrays-2307.04179</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ians-intelligibility-aware-null-steering-beamforming-for-dual-microphone-arrays-2307.04179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ians-intelligibility-aware-null-steering-beamforming-for-dual-microphone-arrays-2307.04179"/></url>
<url><loc>https://scifaro.com/en/abs/a-demand-driven-perspective-on-generative-audio-ai-2307.04292</loc><lastmod>2023-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-demand-driven-perspective-on-generative-audio-ai-2307.04292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-demand-driven-perspective-on-generative-audio-ai-2307.04292"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-an-external-microphone-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2307.04460</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-an-external-microphone-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2307.04460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-an-external-microphone-for-binaural-rtf-vector-based-direction-of-arrival-estimation-for-multiple-speakers-2307.04460"/></url>
<url><loc>https://scifaro.com/en/abs/study-on-the-correlation-between-objective-evaluations-and-subjective-speech-quality-and-intelligibility-2307.04517</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-on-the-correlation-between-objective-evaluations-and-subjective-speech-quality-and-intelligibility-2307.04517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-on-the-correlation-between-objective-evaluations-and-subjective-speech-quality-and-intelligibility-2307.04517"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-transfer-using-image-to-image-denoising-diffusion-implicit-models-2307.04586</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-transfer-using-image-to-image-denoising-diffusion-implicit-models-2307.04586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-transfer-using-image-to-image-denoising-diffusion-implicit-models-2307.04586"/></url>
<url><loc>https://scifaro.com/en/abs/behavioral-analysis-of-pathological-speaker-embeddings-of-patients-during-oncological-treatment-of-oral-cancer-2307.04744</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/behavioral-analysis-of-pathological-speaker-embeddings-of-patients-during-oncological-treatment-of-oral-cancer-2307.04744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/behavioral-analysis-of-pathological-speaker-embeddings-of-patients-during-oncological-treatment-of-oral-cancer-2307.04744"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-tuberculosis-from-real-world-cough-audio-recordings-and-metadata-2307.04842</loc><lastmod>2026-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-tuberculosis-from-real-world-cough-audio-recordings-and-metadata-2307.04842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-tuberculosis-from-real-world-cough-audio-recordings-and-metadata-2307.04842"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-self-supervised-speech-representations-in-spontaneous-speech-synthesis-2307.05132</loc><lastmod>2023-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-self-supervised-speech-representations-in-spontaneous-speech-synthesis-2307.05132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-self-supervised-speech-representations-in-spontaneous-speech-synthesis-2307.05132"/></url>
<url><loc>https://scifaro.com/en/abs/anti-noise-window-subjective-perception-of-active-noise-reduction-and-effect-of-informational-masking-2307.05533</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anti-noise-window-subjective-perception-of-active-noise-reduction-and-effect-of-informational-masking-2307.05533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anti-noise-window-subjective-perception-of-active-noise-reduction-and-effect-of-informational-masking-2307.05533"/></url>
<url><loc>https://scifaro.com/en/abs/speech-diarization-and-asr-with-gmm-2307.05637</loc><lastmod>2024-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-diarization-and-asr-with-gmm-2307.05637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-diarization-and-asr-with-gmm-2307.05637"/></url>
<url><loc>https://scifaro.com/en/abs/point-to-the-hidden-exposing-speech-audio-splicing-via-signal-pointer-nets-2307.05641</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/point-to-the-hidden-exposing-speech-audio-splicing-via-signal-pointer-nets-2307.05641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/point-to-the-hidden-exposing-speech-audio-splicing-via-signal-pointer-nets-2307.05641"/></url>
<url><loc>https://scifaro.com/en/abs/rhythm-modeling-for-voice-conversion-2307.06040</loc><lastmod>2023-07-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rhythm-modeling-for-voice-conversion-2307.06040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rhythm-modeling-for-voice-conversion-2307.06040"/></url>
<url><loc>https://scifaro.com/en/abs/global-birdsong-embeddings-enable-superior-transfer-learning-for-bioacoustic-classification-2307.06292</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/global-birdsong-embeddings-enable-superior-transfer-learning-for-bioacoustic-classification-2307.06292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/global-birdsong-embeddings-enable-superior-transfer-learning-for-bioacoustic-classification-2307.06292"/></url>
<url><loc>https://scifaro.com/en/abs/lace-a-light-weight-causal-model-for-enhancing-coded-speech-through-adaptive-convolutions-2307.06610</loc><lastmod>2023-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lace-a-light-weight-causal-model-for-enhancing-coded-speech-through-adaptive-convolutions-2307.06610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lace-a-light-weight-causal-model-for-enhancing-coded-speech-through-adaptive-convolutions-2307.06610"/></url>
<url><loc>https://scifaro.com/en/abs/an-improved-metric-of-informational-masking-for-perceptual-audio-quality-measurement-2307.06656</loc><lastmod>2023-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-improved-metric-of-informational-masking-for-perceptual-audio-quality-measurement-2307.06656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-improved-metric-of-informational-masking-for-perceptual-audio-quality-measurement-2307.06656"/></url>
<url><loc>https://scifaro.com/en/abs/personalization-for-bert-based-discriminative-speech-recognition-rescoring-2307.06832</loc><lastmod>2023-07-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalization-for-bert-based-discriminative-speech-recognition-rescoring-2307.06832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalization-for-bert-based-discriminative-speech-recognition-rescoring-2307.06832"/></url>
<url><loc>https://scifaro.com/en/abs/controllable-emphasis-with-zero-data-for-text-to-speech-2307.07062</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controllable-emphasis-with-zero-data-for-text-to-speech-2307.07062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controllable-emphasis-with-zero-data-for-text-to-speech-2307.07062"/></url>
<url><loc>https://scifaro.com/en/abs/low-rank-properties-for-estimating-microphones-start-time-and-sources-emission-time-2307.07096</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-rank-properties-for-estimating-microphones-start-time-and-sources-emission-time-2307.07096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-rank-properties-for-estimating-microphones-start-time-and-sources-emission-time-2307.07096"/></url>
<url><loc>https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-spherical-listening-region-2307.07200</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-spherical-listening-region-2307.07200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-spherical-listening-region-2307.07200"/></url>
<url><loc>https://scifaro.com/en/abs/mega-tts-2-boosting-prompting-mechanisms-for-zero-shot-speech-synthesis-2307.07218</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mega-tts-2-boosting-prompting-mechanisms-for-zero-shot-speech-synthesis-2307.07218"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mega-tts-2-boosting-prompting-mechanisms-for-zero-shot-speech-synthesis-2307.07218"/></url>
<url><loc>https://scifaro.com/en/abs/representation-learning-with-hidden-unit-clustering-for-low-resource-speech-applications-2307.07325</loc><lastmod>2023-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-learning-with-hidden-unit-clustering-for-low-resource-speech-applications-2307.07325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-learning-with-hidden-unit-clustering-for-low-resource-speech-applications-2307.07325"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-self-supervised-audio-visual-pretrained-models-to-improve-vocoded-speech-intelligibility-in-cochlear-implant-simulation-2307.07748</loc><lastmod>2025-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-self-supervised-audio-visual-pretrained-models-to-improve-vocoded-speech-intelligibility-in-cochlear-implant-simulation-2307.07748"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-self-supervised-audio-visual-pretrained-models-to-improve-vocoded-speech-intelligibility-in-cochlear-implant-simulation-2307.07748"/></url>
<url><loc>https://scifaro.com/en/abs/model-adaptation-for-asr-in-low-resource-indian-languages-2307.07948</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/model-adaptation-for-asr-in-low-resource-indian-languages-2307.07948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/model-adaptation-for-asr-in-low-resource-indian-languages-2307.07948"/></url>
<url><loc>https://scifaro.com/en/abs/noise-aware-speech-enhancement-using-diffusion-probabilistic-model-2307.08029</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-aware-speech-enhancement-using-diffusion-probabilistic-model-2307.08029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-aware-speech-enhancement-using-diffusion-probabilistic-model-2307.08029"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-binary-classification-loss-for-speaker-verification-2307.08205</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-binary-classification-loss-for-speaker-verification-2307.08205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-binary-classification-loss-for-speaker-verification-2307.08205"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-large-language-model-with-speech-for-fully-formatted-end-to-end-speech-recognition-2307.08234</loc><lastmod>2023-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-large-language-model-with-speech-for-fully-formatted-end-to-end-speech-recognition-2307.08234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-large-language-model-with-speech-for-fully-formatted-end-to-end-speech-recognition-2307.08234"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-kernel-convolution-network-with-scene-dedicate-training-for-sound-event-localization-and-detection-2307.08239</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-kernel-convolution-network-with-scene-dedicate-training-for-sound-event-localization-and-detection-2307.08239"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-kernel-convolution-network-with-scene-dedicate-training-for-sound-event-localization-and-detection-2307.08239"/></url>
<url><loc>https://scifaro.com/en/abs/vocoder-drift-compensation-by-x-vector-alignment-in-speaker-anonymisation-2307.08403</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vocoder-drift-compensation-by-x-vector-alignment-in-speaker-anonymisation-2307.08403"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vocoder-drift-compensation-by-x-vector-alignment-in-speaker-anonymisation-2307.08403"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-multi-channel-speaker-diarization-with-cross-channel-attention-2307.08688</loc><lastmod>2023-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-multi-channel-speaker-diarization-with-cross-channel-attention-2307.08688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-multi-channel-speaker-diarization-with-cross-channel-attention-2307.08688"/></url>
<url><loc>https://scifaro.com/en/abs/ivrit-ai-a-comprehensive-dataset-of-hebrew-speech-for-ai-research-and-development-2307.08720</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ivrit-ai-a-comprehensive-dataset-of-hebrew-speech-for-ai-research-and-development-2307.08720"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ivrit-ai-a-comprehensive-dataset-of-hebrew-speech-for-ai-research-and-development-2307.08720"/></url>
<url><loc>https://scifaro.com/en/abs/low-bit-rate-binaural-link-for-improved-ultra-low-latency-low-complexity-multichannel-speech-enhancement-in-hearing-aids-2307.08858</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-bit-rate-binaural-link-for-improved-ultra-low-latency-low-complexity-multichannel-speech-enhancement-in-hearing-aids-2307.08858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-bit-rate-binaural-link-for-improved-ultra-low-latency-low-complexity-multichannel-speech-enhancement-in-hearing-aids-2307.08858"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-representation-of-head-related-transfer-functions-in-continuous-space-frequency-domains-2307.09352</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-representation-of-head-related-transfer-functions-in-continuous-space-frequency-domains-2307.09352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-representation-of-head-related-transfer-functions-in-continuous-space-frequency-domains-2307.09352"/></url>
<url><loc>https://scifaro.com/en/abs/slmgan-exploiting-speech-language-model-representations-for-unsupervised-zero-shot-voice-conversion-in-gans-2307.09435</loc><lastmod>2023-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slmgan-exploiting-speech-language-model-representations-for-unsupervised-zero-shot-voice-conversion-in-gans-2307.09435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slmgan-exploiting-speech-language-model-representations-for-unsupervised-zero-shot-voice-conversion-in-gans-2307.09435"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-acoustic-word-embedding-learning-via-correspondence-transformer-encoder-2307.09871</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-acoustic-word-embedding-learning-via-correspondence-transformer-encoder-2307.09871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-acoustic-word-embedding-learning-via-correspondence-transformer-encoder-2307.09871"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-on-the-effects-of-speaker-embedding-choice-in-non-auto-regressive-tts-2307.09898</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-on-the-effects-of-speaker-embedding-choice-in-non-auto-regressive-tts-2307.09898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-on-the-effects-of-speaker-embedding-choice-in-non-auto-regressive-tts-2307.09898"/></url>
<url><loc>https://scifaro.com/en/abs/alzheimer-s-disease-detection-from-spontaneous-speech-and-text-a-review-2307.10005</loc><lastmod>2023-07-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alzheimer-s-disease-detection-from-spontaneous-speech-and-text-a-review-2307.10005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alzheimer-s-disease-detection-from-spontaneous-speech-and-text-a-review-2307.10005"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-domain-sensitive-speech-recognition-with-prompt-conditioning-fine-tuning-2307.10274</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-domain-sensitive-speech-recognition-with-prompt-conditioning-fine-tuning-2307.10274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-domain-sensitive-speech-recognition-with-prompt-conditioning-fine-tuning-2307.10274"/></url>
<url><loc>https://scifaro.com/en/abs/pas-partial-additive-speech-data-augmentation-method-for-noise-robust-speaker-verification-2307.10628</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pas-partial-additive-speech-data-augmentation-method-for-noise-robust-speaker-verification-2307.10628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pas-partial-additive-speech-data-augmentation-method-for-noise-robust-speaker-verification-2307.10628"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-and-bias-correction-with-pre-trained-audio-embeddings-2307.10834</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-and-bias-correction-with-pre-trained-audio-embeddings-2307.10834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-and-bias-correction-with-pre-trained-audio-embeddings-2307.10834"/></url>
<url><loc>https://scifaro.com/en/abs/globally-normalising-the-transducer-for-streaming-speech-recognition-2307.10975</loc><lastmod>2023-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/globally-normalising-the-transducer-for-streaming-speech-recognition-2307.10975"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/globally-normalising-the-transducer-for-streaming-speech-recognition-2307.10975"/></url>
<url><loc>https://scifaro.com/en/abs/topic-identification-for-spontaneous-speech-enriching-audio-features-with-embedded-linguistic-information-2307.11450</loc><lastmod>2023-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topic-identification-for-spontaneous-speech-enriching-audio-features-with-embedded-linguistic-information-2307.11450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topic-identification-for-spontaneous-speech-enriching-audio-features-with-embedded-linguistic-information-2307.11450"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-large-language-models-with-speech-recognition-abilities-2307.11795</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-large-language-models-with-speech-recognition-abilities-2307.11795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-large-language-models-with-speech-recognition-abilities-2307.11795"/></url>
<url><loc>https://scifaro.com/en/abs/self-refining-of-pseudo-labels-for-music-source-separation-with-noisy-labeled-data-2307.12576</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-refining-of-pseudo-labels-for-music-source-separation-with-noisy-labeled-data-2307.12576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-refining-of-pseudo-labels-for-music-source-separation-with-noisy-labeled-data-2307.12576"/></url>
<url><loc>https://scifaro.com/en/abs/integration-of-frame-and-label-synchronous-beam-search-for-streaming-encoder-decoder-speech-recognition-2307.12767</loc><lastmod>2023-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integration-of-frame-and-label-synchronous-beam-search-for-streaming-encoder-decoder-speech-recognition-2307.12767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integration-of-frame-and-label-synchronous-beam-search-for-streaming-encoder-decoder-speech-recognition-2307.12767"/></url>
<url><loc>https://scifaro.com/en/abs/iteratta-an-interface-for-exploring-both-text-prompts-and-audio-priors-in-generating-music-with-text-to-audio-models-2307.13005</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iteratta-an-interface-for-exploring-both-text-prompts-and-audio-priors-in-generating-music-with-text-to-audio-models-2307.13005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iteratta-an-interface-for-exploring-both-text-prompts-and-audio-priors-in-generating-music-with-text-to-audio-models-2307.13005"/></url>
<url><loc>https://scifaro.com/en/abs/adaptation-of-whisper-models-to-child-speech-recognition-2307.13008</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptation-of-whisper-models-to-child-speech-recognition-2307.13008"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptation-of-whisper-models-to-child-speech-recognition-2307.13008"/></url>
<url><loc>https://scifaro.com/en/abs/on-device-speaker-anonymization-of-acoustic-embeddings-for-asr-based-onflexible-location-gradient-reversal-layer-2307.13343</loc><lastmod>2023-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-device-speaker-anonymization-of-acoustic-embeddings-for-asr-based-onflexible-location-gradient-reversal-layer-2307.13343"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-device-speaker-anonymization-of-acoustic-embeddings-for-asr-based-onflexible-location-gradient-reversal-layer-2307.13343"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-interactions-between-target-positive-and-negative-information-for-acoustic-echo-cancellation-2307.13888</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-interactions-between-target-positive-and-negative-information-for-acoustic-echo-cancellation-2307.13888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-interactions-between-target-positive-and-negative-information-for-acoustic-echo-cancellation-2307.13888"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-quality-enhancement-of-sound-field-synthesis-based-on-combination-of-pressure-and-amplitude-matching-2307.13941</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-quality-enhancement-of-sound-field-synthesis-based-on-combination-of-pressure-and-amplitude-matching-2307.13941"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-quality-enhancement-of-sound-field-synthesis-based-on-combination-of-pressure-and-amplitude-matching-2307.13941"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-estimation-around-a-rigid-sphere-with-physics-informed-neural-network-2307.14013</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-estimation-around-a-rigid-sphere-with-physics-informed-neural-network-2307.14013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-estimation-around-a-rigid-sphere-with-physics-informed-neural-network-2307.14013"/></url>
<url><loc>https://scifaro.com/en/abs/neuroheed-neuro-steered-speaker-extraction-using-eeg-signals-2307.14303</loc><lastmod>2023-07-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuroheed-neuro-steered-speaker-extraction-using-eeg-signals-2307.14303"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuroheed-neuro-steered-speaker-extraction-using-eeg-signals-2307.14303"/></url>
<url><loc>https://scifaro.com/en/abs/diff-e-diffusion-based-learning-for-decoding-imagined-speech-eeg-2307.14389</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-e-diffusion-based-learning-for-decoding-imagined-speech-eeg-2307.14389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-e-diffusion-based-learning-for-decoding-imagined-speech-eeg-2307.14389"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-spoken-language-on-speech-enhancement-using-self-supervised-speech-representation-loss-functions-2307.14502</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-spoken-language-on-speech-enhancement-using-self-supervised-speech-representation-loss-functions-2307.14502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-spoken-language-on-speech-enhancement-using-self-supervised-speech-representation-loss-functions-2307.14502"/></url>
<url><loc>https://scifaro.com/en/abs/mitigating-cross-database-differences-for-learning-unified-hrtf-representation-2307.14547</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mitigating-cross-database-differences-for-learning-unified-hrtf-representation-2307.14547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mitigating-cross-database-differences-for-learning-unified-hrtf-representation-2307.14547"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-upsampling-of-head-related-transfer-functions-using-a-physics-informed-neural-network-2307.14650</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-upsampling-of-head-related-transfer-functions-using-a-physics-informed-neural-network-2307.14650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-upsampling-of-head-related-transfer-functions-using-a-physics-informed-neural-network-2307.14650"/></url>
<url><loc>https://scifaro.com/en/abs/audio-inputs-for-active-speaker-detection-and-localization-via-microphone-array-2307.14739</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-inputs-for-active-speaker-detection-and-localization-via-microphone-array-2307.14739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-inputs-for-active-speaker-detection-and-localization-via-microphone-array-2307.14739"/></url>
<url><loc>https://scifaro.com/en/abs/emotion4midi-a-lyrics-based-emotion-labeled-symbolic-music-dataset-2307.14783</loc><lastmod>2023-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion4midi-a-lyrics-based-emotion-labeled-symbolic-music-dataset-2307.14783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion4midi-a-lyrics-based-emotion-labeled-symbolic-music-dataset-2307.14783"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-children-abuse-by-voice-and-audio-classification-by-short-time-fourier-transform-machine-learning-implemented-on-nvidia-edge-gpu-device-2307.15101</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-children-abuse-by-voice-and-audio-classification-by-short-time-fourier-transform-machine-learning-implemented-on-nvidia-edge-gpu-device-2307.15101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-children-abuse-by-voice-and-audio-classification-by-short-time-fourier-transform-machine-learning-implemented-on-nvidia-edge-gpu-device-2307.15101"/></url>
<url><loc>https://scifaro.com/en/abs/pcnn-a-lightweight-parallel-conformer-neural-network-for-efficient-monaural-speech-enhancement-2307.15251</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pcnn-a-lightweight-parallel-conformer-neural-network-for-efficient-monaural-speech-enhancement-2307.15251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pcnn-a-lightweight-parallel-conformer-neural-network-for-efficient-monaural-speech-enhancement-2307.15251"/></url>
<url><loc>https://scifaro.com/en/abs/a-time-frequency-generative-adversarial-based-method-for-audio-packet-loss-concealment-2307.15611</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-time-frequency-generative-adversarial-based-method-for-audio-packet-loss-concealment-2307.15611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-time-frequency-generative-adversarial-based-method-for-audio-packet-loss-concealment-2307.15611"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-acoustic-echo-suppression-with-condition-aware-training-2307.15630</loc><lastmod>2023-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-acoustic-echo-suppression-with-condition-aware-training-2307.15630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-acoustic-echo-suppression-with-condition-aware-training-2307.15630"/></url>
<url><loc>https://scifaro.com/en/abs/metts-multilingual-emotional-text-to-speech-by-cross-speaker-and-cross-lingual-emotion-transfer-2307.15951</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metts-multilingual-emotional-text-to-speech-by-cross-speaker-and-cross-lingual-emotion-transfer-2307.15951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metts-multilingual-emotional-text-to-speech-by-cross-speaker-and-cross-lingual-emotion-transfer-2307.15951"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-end-to-end-asr-models-with-augmented-speech-samples-queried-by-text-2307.16332</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-end-to-end-asr-models-with-augmented-speech-samples-queried-by-text-2307.16332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-end-to-end-asr-models-with-augmented-speech-samples-queried-by-text-2307.16332"/></url>
<url><loc>https://scifaro.com/en/abs/robust-self-supervised-speech-embeddings-for-child-adult-classification-in-interactions-involving-children-with-autism-2307.16398</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-self-supervised-speech-embeddings-for-child-adult-classification-in-interactions-involving-children-with-autism-2307.16398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-self-supervised-speech-embeddings-for-child-adult-classification-in-interactions-involving-children-with-autism-2307.16398"/></url>
<url><loc>https://scifaro.com/en/abs/all-in-one-metrical-and-functional-structure-analysis-with-neighborhood-attentions-on-demixed-audio-2307.16425</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/all-in-one-metrical-and-functional-structure-analysis-with-neighborhood-attentions-on-demixed-audio-2307.16425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/all-in-one-metrical-and-functional-structure-analysis-with-neighborhood-attentions-on-demixed-audio-2307.16425"/></url>
<url><loc>https://scifaro.com/en/abs/samba-speech-enhancement-with-asynchronous-ad-hoc-microphone-arrays-2307.16582</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/samba-speech-enhancement-with-asynchronous-ad-hoc-microphone-arrays-2307.16582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/samba-speech-enhancement-with-asynchronous-ad-hoc-microphone-arrays-2307.16582"/></url>
<url><loc>https://scifaro.com/en/abs/improving-grapheme-to-phoneme-conversion-by-learning-pronunciations-from-speech-recordings-2307.16643</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-grapheme-to-phoneme-conversion-by-learning-pronunciations-from-speech-recordings-2307.16643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-grapheme-to-phoneme-conversion-by-learning-pronunciations-from-speech-recordings-2307.16643"/></url>
<url><loc>https://scifaro.com/en/abs/comparing-normalizing-flows-and-diffusion-models-for-prosody-and-acoustic-modelling-in-text-to-speech-2307.16679</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparing-normalizing-flows-and-diffusion-models-for-prosody-and-acoustic-modelling-in-text-to-speech-2307.16679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparing-normalizing-flows-and-diffusion-models-for-prosody-and-acoustic-modelling-in-text-to-speech-2307.16679"/></url>
<url><loc>https://scifaro.com/en/abs/an-enhanced-system-for-the-detection-and-active-cancellation-of-snoring-signals-2307.16809</loc><lastmod>2023-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-enhanced-system-for-the-detection-and-active-cancellation-of-snoring-signals-2307.16809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-enhanced-system-for-the-detection-and-active-cancellation-of-snoring-signals-2307.16809"/></url>
<url><loc>https://scifaro.com/en/abs/speech-representation-learning-learning-bidirectional-encoders-with-single-view-multi-view-and-multi-task-methods-2308.00129</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-representation-learning-learning-bidirectional-encoders-with-single-view-multi-view-and-multi-task-methods-2308.00129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-representation-learning-learning-bidirectional-encoders-with-single-view-multi-view-and-multi-task-methods-2308.00129"/></url>
<url><loc>https://scifaro.com/en/abs/the-role-of-vowel-and-consonant-onsets-in-neural-tracking-of-natural-speech-2308.00161</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-role-of-vowel-and-consonant-onsets-in-neural-tracking-of-natural-speech-2308.00161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-role-of-vowel-and-consonant-onsets-in-neural-tracking-of-natural-speech-2308.00161"/></url>
<url><loc>https://scifaro.com/en/abs/circumvent-spherical-bessel-function-nulls-for-open-sphere-microphone-arrays-with-physics-informed-neural-network-2308.00242</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/circumvent-spherical-bessel-function-nulls-for-open-sphere-microphone-arrays-with-physics-informed-neural-network-2308.00242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/circumvent-spherical-bessel-function-nulls-for-open-sphere-microphone-arrays-with-physics-informed-neural-network-2308.00242"/></url>
<url><loc>https://scifaro.com/en/abs/generative-adversarial-networks-with-physical-sound-field-priors-2308.00426</loc><lastmod>2023-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-adversarial-networks-with-physical-sound-field-priors-2308.00426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-adversarial-networks-with-physical-sound-field-priors-2308.00426"/></url>
<url><loc>https://scifaro.com/en/abs/myvoice-arabic-speech-resource-collaboration-platform-2308.02503</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/myvoice-arabic-speech-resource-collaboration-platform-2308.02503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/myvoice-arabic-speech-resource-collaboration-platform-2308.02503"/></url>
<url><loc>https://scifaro.com/en/abs/choir-transformer-generating-polyphonic-music-with-relative-attention-on-transformer-2308.02531</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/choir-transformer-generating-polyphonic-music-with-relative-attention-on-transformer-2308.02531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/choir-transformer-generating-polyphonic-music-with-relative-attention-on-transformer-2308.02531"/></url>
<url><loc>https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2308.02774</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2308.02774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2308.02774"/></url>
<url><loc>https://scifaro.com/en/abs/investigation-of-self-supervised-pre-trained-models-for-classification-of-voice-quality-from-speech-and-neck-surface-accelerometer-signals-2308.03226</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigation-of-self-supervised-pre-trained-models-for-classification-of-voice-quality-from-speech-and-neck-surface-accelerometer-signals-2308.03226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigation-of-self-supervised-pre-trained-models-for-classification-of-voice-quality-from-speech-and-neck-surface-accelerometer-signals-2308.03226"/></url>
<url><loc>https://scifaro.com/en/abs/audiovmaf-audio-quality-prediction-with-vmaf-2308.03437</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovmaf-audio-quality-prediction-with-vmaf-2308.03437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovmaf-audio-quality-prediction-with-vmaf-2308.03437"/></url>
<url><loc>https://scifaro.com/en/abs/active-noise-control-based-on-the-momentum-multichannel-normalized-filtered-x-least-mean-square-algorithm-2308.03684</loc><lastmod>2023-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/active-noise-control-based-on-the-momentum-multichannel-normalized-filtered-x-least-mean-square-algorithm-2308.03684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/active-noise-control-based-on-the-momentum-multichannel-normalized-filtered-x-least-mean-square-algorithm-2308.03684"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-extraction-with-conditional-diffusion-model-2308.03987</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-extraction-with-conditional-diffusion-model-2308.03987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-extraction-with-conditional-diffusion-model-2308.03987"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-speaker-embedding-disentanglement-on-natural-read-speech-2308.04225</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-speaker-embedding-disentanglement-on-natural-read-speech-2308.04225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-speaker-embedding-disentanglement-on-natural-read-speech-2308.04225"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-natural-language-based-audio-retrieval-with-passt-and-large-audio-caption-data-sets-2308.04258</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-natural-language-based-audio-retrieval-with-passt-and-large-audio-caption-data-sets-2308.04258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-natural-language-based-audio-retrieval-with-passt-and-large-audio-caption-data-sets-2308.04258"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-the-wav2vec-2-0-feature-extractor-2308.04286</loc><lastmod>2023-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-the-wav2vec-2-0-feature-extractor-2308.04286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-the-wav2vec-2-0-feature-extractor-2308.04286"/></url>
<url><loc>https://scifaro.com/en/abs/separate-anything-you-describe-2308.05037</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-anything-you-describe-2308.05037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-anything-you-describe-2308.05037"/></url>
<url><loc>https://scifaro.com/en/abs/stabilizing-training-with-soft-dynamic-time-warping-a-case-study-for-pitch-class-estimation-with-weakly-aligned-targets-2308.05429</loc><lastmod>2023-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stabilizing-training-with-soft-dynamic-time-warping-a-case-study-for-pitch-class-estimation-with-weakly-aligned-targets-2308.05429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stabilizing-training-with-soft-dynamic-time-warping-a-case-study-for-pitch-class-estimation-with-weakly-aligned-targets-2308.05429"/></url>
<url><loc>https://scifaro.com/en/abs/bilingual-streaming-asr-with-grapheme-units-and-auxiliary-monolingual-loss-2308.06327</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bilingual-streaming-asr-with-grapheme-units-and-auxiliary-monolingual-loss-2308.06327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bilingual-streaming-asr-with-grapheme-units-and-auxiliary-monolingual-loss-2308.06327"/></url>
<url><loc>https://scifaro.com/en/abs/knowledge-distilled-ensemble-model-for-semg-based-silent-speech-interface-2308.06533</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knowledge-distilled-ensemble-model-for-semg-based-silent-speech-interface-2308.06533"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knowledge-distilled-ensemble-model-for-semg-based-silent-speech-interface-2308.06533"/></url>
<url><loc>https://scifaro.com/en/abs/alternative-pseudo-labeling-for-semi-supervised-automatic-speech-recognition-2308.06547</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alternative-pseudo-labeling-for-semi-supervised-automatic-speech-recognition-2308.06547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alternative-pseudo-labeling-for-semi-supervised-automatic-speech-recognition-2308.06547"/></url>
<url><loc>https://scifaro.com/en/abs/speechx-neural-codec-language-model-as-a-versatile-speech-transformer-2308.06873</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechx-neural-codec-language-model-as-a-versatile-speech-transformer-2308.06873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechx-neural-codec-language-model-as-a-versatile-speech-transformer-2308.06873"/></url>
<url><loc>https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-music-demixing-track-2308.06979</loc><lastmod>2024-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-music-demixing-track-2308.06979"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-music-demixing-track-2308.06979"/></url>
<url><loc>https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-cinematic-demixing-track-2308.06981</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-cinematic-demixing-track-2308.06981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-sound-demixing-challenge-2023-unicode-x2013-cinematic-demixing-track-2308.06981"/></url>
<url><loc>https://scifaro.com/en/abs/voxblink-a-large-scale-speaker-verification-dataset-on-camera-2308.07056</loc><lastmod>2023-12-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxblink-a-large-scale-speaker-verification-dataset-on-camera-2308.07056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxblink-a-large-scale-speaker-verification-dataset-on-camera-2308.07056"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-emotion-recognition-with-speech-recognition-and-speaker-diarisation-for-conversations-2308.07145</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-emotion-recognition-with-speech-recognition-and-speaker-diarisation-for-conversations-2308.07145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-emotion-recognition-with-speech-recognition-and-speaker-diarisation-for-conversations-2308.07145"/></url>
<url><loc>https://scifaro.com/en/abs/compositional-nonlinear-audio-signal-processing-with-volterra-series-2308.07229</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compositional-nonlinear-audio-signal-processing-with-volterra-series-2308.07229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compositional-nonlinear-audio-signal-processing-with-volterra-series-2308.07229"/></url>
<url><loc>https://scifaro.com/en/abs/localization-of-doa-trajectories-beyond-the-grid-2308.07265</loc><lastmod>2023-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localization-of-doa-trajectories-beyond-the-grid-2308.07265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localization-of-doa-trajectories-beyond-the-grid-2308.07265"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-msxf-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.07595</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-msxf-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.07595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-msxf-diarization-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.07595"/></url>
<url><loc>https://scifaro.com/en/abs/preliminary-investigation-of-the-short-term-in-situ-performance-of-an-automatic-masker-selection-system-2308.07767</loc><lastmod>2023-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preliminary-investigation-of-the-short-term-in-situ-performance-of-an-automatic-masker-selection-system-2308.07767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preliminary-investigation-of-the-short-term-in-situ-performance-of-an-automatic-masker-selection-system-2308.07767"/></url>
<url><loc>https://scifaro.com/en/abs/gist-aiter-speaker-diarization-system-for-voxceleb-speaker-recognition-challenge-voxsrc-2023-2308.07788</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gist-aiter-speaker-diarization-system-for-voxceleb-speaker-recognition-challenge-voxsrc-2023-2308.07788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gist-aiter-speaker-diarization-system-for-voxceleb-speaker-recognition-challenge-voxsrc-2023-2308.07788"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-with-multilingual-neural-representations-2308.08027</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-with-multilingual-neural-representations-2308.08027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-open-vocabulary-keyword-search-with-multilingual-neural-representations-2308.08027"/></url>
<url><loc>https://scifaro.com/en/abs/the-id-r-d-voxceleb-speaker-recognition-challenge-2023-system-description-2308.08294</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-id-r-d-voxceleb-speaker-recognition-challenge-2023-system-description-2308.08294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-id-r-d-voxceleb-speaker-recognition-challenge-2023-system-description-2308.08294"/></url>
<url><loc>https://scifaro.com/en/abs/classifying-dementia-in-the-presence-of-depression-a-cross-corpus-study-2308.08306</loc><lastmod>2023-08-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classifying-dementia-in-the-presence-of-depression-a-cross-corpus-study-2308.08306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classifying-dementia-in-the-presence-of-depression-a-cross-corpus-study-2308.08306"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-msxf-speaker-verification-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.08766</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-msxf-speaker-verification-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.08766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-msxf-speaker-verification-system-for-the-voxceleb-speaker-recognition-challenge-2023-2308.08766"/></url>
<url><loc>https://scifaro.com/en/abs/graph-neural-network-backend-for-speaker-recognition-2308.08767</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-neural-network-backend-for-speaker-recognition-2308.08767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-neural-network-backend-for-speaker-recognition-2308.08767"/></url>
<url><loc>https://scifaro.com/en/abs/meta-seld-meta-learning-for-fast-adaptation-to-the-new-environment-in-sound-event-localization-and-detection-2308.08847</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-seld-meta-learning-for-fast-adaptation-to-the-new-environment-in-sound-event-localization-and-detection-2308.08847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-seld-meta-learning-for-fast-adaptation-to-the-new-environment-in-sound-event-localization-and-detection-2308.08847"/></url>
<url><loc>https://scifaro.com/en/abs/explicit-estimation-of-magnitude-and-phase-spectra-in-parallel-for-high-quality-speech-enhancement-2308.08926</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explicit-estimation-of-magnitude-and-phase-spectra-in-parallel-for-high-quality-speech-enhancement-2308.08926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explicit-estimation-of-magnitude-and-phase-spectra-in-parallel-for-high-quality-speech-enhancement-2308.08926"/></url>
<url><loc>https://scifaro.com/en/abs/home-monitoring-for-frailty-detection-through-sound-and-speaker-diarization-analysis-2308.08985</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/home-monitoring-for-frailty-detection-through-sound-and-speaker-diarization-analysis-2308.08985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/home-monitoring-for-frailty-detection-through-sound-and-speaker-diarization-analysis-2308.08985"/></url>
<url><loc>https://scifaro.com/en/abs/severity-classification-of-parkinson-s-disease-from-speech-using-single-frequency-filtering-based-features-2308.09042</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/severity-classification-of-parkinson-s-disease-from-speech-using-single-frequency-filtering-based-features-2308.09042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/severity-classification-of-parkinson-s-disease-from-speech-using-single-frequency-filtering-based-features-2308.09042"/></url>
<url><loc>https://scifaro.com/en/abs/refining-a-deep-learning-based-formant-tracker-using-linear-prediction-methods-2308.09051</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refining-a-deep-learning-based-formant-tracker-using-linear-prediction-methods-2308.09051"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refining-a-deep-learning-based-formant-tracker-using-linear-prediction-methods-2308.09051"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-pseudo-label-learning-for-non-intrusive-speech-quality-assessment-model-2308.09262</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-pseudo-label-learning-for-non-intrusive-speech-quality-assessment-model-2308.09262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-pseudo-label-learning-for-non-intrusive-speech-quality-assessment-model-2308.09262"/></url>
<url><loc>https://scifaro.com/en/abs/generative-machine-listener-2308.09493</loc><lastmod>2023-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-machine-listener-2308.09493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-machine-listener-2308.09493"/></url>
<url><loc>https://scifaro.com/en/abs/effects-of-convolutional-autoencoder-bottleneck-width-on-stargan-based-singing-technique-conversion-2308.10021</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effects-of-convolutional-autoencoder-bottleneck-width-on-stargan-based-singing-technique-conversion-2308.10021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effects-of-convolutional-autoencoder-bottleneck-width-on-stargan-based-singing-technique-conversion-2308.10021"/></url>
<url><loc>https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-manipulation-region-location-task-of-add-2023-2308.10281</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-manipulation-region-location-task-of-add-2023-2308.10281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-dku-dukeece-system-for-the-manipulation-region-location-task-of-add-2023-2308.10281"/></url>
<url><loc>https://scifaro.com/en/abs/local-periodicity-based-beat-tracking-for-expressive-classical-piano-music-2308.10355</loc><lastmod>2023-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/local-periodicity-based-beat-tracking-for-expressive-classical-piano-music-2308.10355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/local-periodicity-based-beat-tracking-for-expressive-classical-piano-music-2308.10355"/></url>
<url><loc>https://scifaro.com/en/abs/multi-gradspeech-towards-diffusion-based-multi-speaker-text-to-speech-using-consistent-diffusion-models-2308.10428</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-gradspeech-towards-diffusion-based-multi-speaker-text-to-speech-using-consistent-diffusion-models-2308.10428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-gradspeech-towards-diffusion-based-multi-speaker-text-to-speech-using-consistent-diffusion-models-2308.10428"/></url>
<url><loc>https://scifaro.com/en/abs/implicit-self-supervised-language-representation-for-spoken-language-diarization-2308.10470</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/implicit-self-supervised-language-representation-for-spoken-language-diarization-2308.10470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/implicit-self-supervised-language-representation-for-spoken-language-diarization-2308.10470"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-dual-path-compression-for-joint-echo-cancellation-and-noise-suppression-2308.11053</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-dual-path-compression-for-joint-echo-cancellation-and-noise-suppression-2308.11053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-dual-path-compression-for-joint-echo-cancellation-and-noise-suppression-2308.11053"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-the-speech-resynthesis-capabilities-of-the-voiceprivacy-challenge-baseline-b1-2308.11337</loc><lastmod>2023-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-the-speech-resynthesis-capabilities-of-the-voiceprivacy-challenge-baseline-b1-2308.11337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-the-speech-resynthesis-capabilities-of-the-voiceprivacy-challenge-baseline-b1-2308.11337"/></url>
<url><loc>https://scifaro.com/en/abs/example-based-framework-for-perceptually-guided-audio-texture-generation-2308.11859</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/example-based-framework-for-perceptually-guided-audio-texture-generation-2308.11859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/example-based-framework-for-perceptually-guided-audio-texture-generation-2308.11859"/></url>
<url><loc>https://scifaro.com/en/abs/kinspeak-improving-speech-recognition-for-kinyarwanda-via-semi-supervised-learning-methods-2308.11863</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kinspeak-improving-speech-recognition-for-kinyarwanda-via-semi-supervised-learning-methods-2308.11863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kinspeak-improving-speech-recognition-for-kinyarwanda-via-semi-supervised-learning-methods-2308.11863"/></url>
<url><loc>https://scifaro.com/en/abs/audio-difference-captioning-utilizing-similarity-discrepancy-disentanglement-2308.11923</loc><lastmod>2023-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-difference-captioning-utilizing-similarity-discrepancy-disentanglement-2308.11923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-difference-captioning-utilizing-similarity-discrepancy-disentanglement-2308.11923"/></url>
<url><loc>https://scifaro.com/en/abs/joint-prediction-of-audio-event-and-annoyance-rating-in-an-urban-soundscape-by-hierarchical-graph-representation-learning-2308.11980</loc><lastmod>2023-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-prediction-of-audio-event-and-annoyance-rating-in-an-urban-soundscape-by-hierarchical-graph-representation-learning-2308.11980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-prediction-of-audio-event-and-annoyance-rating-in-an-urban-soundscape-by-hierarchical-graph-representation-learning-2308.11980"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-xls-r-for-speech-quality-assessment-2308.12077</loc><lastmod>2023-08-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-xls-r-for-speech-quality-assessment-2308.12077"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-xls-r-for-speech-quality-assessment-2308.12077"/></url>
<url><loc>https://scifaro.com/en/abs/unisound-system-for-voxceleb-speaker-recognition-challenge-2023-2308.12526</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unisound-system-for-voxceleb-speaker-recognition-challenge-2023-2308.12526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unisound-system-for-voxceleb-speaker-recognition-challenge-2023-2308.12526"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-noise-shaping-for-audio-coding-using-perfectly-overlapped-window-2308.12566</loc><lastmod>2023-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-noise-shaping-for-audio-coding-using-perfectly-overlapped-window-2308.12566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-noise-shaping-for-audio-coding-using-perfectly-overlapped-window-2308.12566"/></url>
<url><loc>https://scifaro.com/en/abs/decoupled-structure-for-improved-adaptability-of-end-to-end-models-2308.13345</loc><lastmod>2023-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoupled-structure-for-improved-adaptability-of-end-to-end-models-2308.13345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoupled-structure-for-improved-adaptability-of-end-to-end-models-2308.13345"/></url>
<url><loc>https://scifaro.com/en/abs/fairness-and-privacy-in-voice-biometrics-a-study-of-gender-influences-using-wav2vec-2-0-2308.14049</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fairness-and-privacy-in-voice-biometrics-a-study-of-gender-influences-using-wav2vec-2-0-2308.14049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fairness-and-privacy-in-voice-biometrics-a-study-of-gender-influences-using-wav2vec-2-0-2308.14049"/></url>
<url><loc>https://scifaro.com/en/abs/textrolspeech-a-text-style-control-speech-corpus-with-codec-language-text-to-speech-models-2308.14430</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/textrolspeech-a-text-style-control-speech-corpus-with-codec-language-text-to-speech-models-2308.14430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/textrolspeech-a-text-style-control-speech-corpus-with-codec-language-text-to-speech-models-2308.14430"/></url>
<url><loc>https://scifaro.com/en/abs/speech-self-supervised-representations-benchmarking-a-case-for-larger-probing-heads-2308.14456</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-self-supervised-representations-benchmarking-a-case-for-larger-probing-heads-2308.14456"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-self-supervised-representations-benchmarking-a-case-for-larger-probing-heads-2308.14456"/></url>
<url><loc>https://scifaro.com/en/abs/rep2wav-noise-robust-text-to-speech-using-self-supervised-representations-2308.14553</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rep2wav-noise-robust-text-to-speech-using-self-supervised-representations-2308.14553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rep2wav-noise-robust-text-to-speech-using-self-supervised-representations-2308.14553"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-3d-room-geometry-inference-with-a-linear-loudspeaker-array-and-a-single-microphone-2308.14611</loc><lastmod>2023-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-3d-room-geometry-inference-with-a-linear-loudspeaker-array-and-a-single-microphone-2308.14611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-3d-room-geometry-inference-with-a-linear-loudspeaker-array-and-a-single-microphone-2308.14611"/></url>
<url><loc>https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-7-dasr-challenge-2308.14638</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-7-dasr-challenge-2308.14638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-7-dasr-challenge-2308.14638"/></url>
<url><loc>https://scifaro.com/en/abs/voicebank-2023-a-multi-speaker-mandarin-speech-corpus-for-constructing-personalized-tts-systems-for-the-speech-impaired-2308.14763</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicebank-2023-a-multi-speaker-mandarin-speech-corpus-for-constructing-personalized-tts-systems-for-the-speech-impaired-2308.14763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicebank-2023-a-multi-speaker-mandarin-speech-corpus-for-constructing-personalized-tts-systems-for-the-speech-impaired-2308.14763"/></url>
<url><loc>https://scifaro.com/en/abs/eeg-derived-voice-signature-for-attended-speaker-detection-2308.14774</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg-derived-voice-signature-for-attended-speaker-detection-2308.14774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg-derived-voice-signature-for-attended-speaker-detection-2308.14774"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-active-learning-optimizing-labeling-cost-effectiveness-for-automatic-speech-recognition-2308.14814</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-active-learning-optimizing-labeling-cost-effectiveness-for-automatic-speech-recognition-2308.14814"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-active-learning-optimizing-labeling-cost-effectiveness-for-automatic-speech-recognition-2308.14814"/></url>
<url><loc>https://scifaro.com/en/abs/preference-based-training-framework-for-automatic-speech-quality-assessment-using-deep-neural-network-2308.15203</loc><lastmod>2023-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preference-based-training-framework-for-automatic-speech-quality-assessment-using-deep-neural-network-2308.15203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preference-based-training-framework-for-automatic-speech-quality-assessment-using-deep-neural-network-2308.15203"/></url>
<url><loc>https://scifaro.com/en/abs/let-there-be-sound-reconstructing-high-quality-speech-from-silent-videos-2308.15256</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/let-there-be-sound-reconstructing-high-quality-speech-from-silent-videos-2308.15256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/let-there-be-sound-reconstructing-high-quality-speech-from-silent-videos-2308.15256"/></url>
<url><loc>https://scifaro.com/en/abs/deftan-ii-efficient-multichannel-speech-enhancement-with-subgroup-processing-2308.15777</loc><lastmod>2025-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deftan-ii-efficient-multichannel-speech-enhancement-with-subgroup-processing-2308.15777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deftan-ii-efficient-multichannel-speech-enhancement-with-subgroup-processing-2308.15777"/></url>
<url><loc>https://scifaro.com/en/abs/the-deepzen-speech-synthesis-system-for-blizzard-challenge-2023-2308.15945</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-deepzen-speech-synthesis-system-for-blizzard-challenge-2023-2308.15945"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-deepzen-speech-synthesis-system-for-blizzard-challenge-2023-2308.15945"/></url>
<url><loc>https://scifaro.com/en/abs/multi-transfer-learning-techniques-for-detecting-auditory-brainstem-response-2308.16203</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-transfer-learning-techniques-for-detecting-auditory-brainstem-response-2308.16203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-transfer-learning-techniques-for-detecting-auditory-brainstem-response-2308.16203"/></url>
<url><loc>https://scifaro.com/en/abs/supervised-contrastive-learning-with-nearest-neighbor-search-for-speech-emotion-recognition-2308.16485</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supervised-contrastive-learning-with-nearest-neighbor-search-for-speech-emotion-recognition-2308.16485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supervised-contrastive-learning-with-nearest-neighbor-search-for-speech-emotion-recognition-2308.16485"/></url>
<url><loc>https://scifaro.com/en/abs/ramp-retrieval-augmented-mos-prediction-via-confidence-based-dynamic-weighting-2308.16488</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ramp-retrieval-augmented-mos-prediction-via-confidence-based-dynamic-weighting-2308.16488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ramp-retrieval-augmented-mos-prediction-via-confidence-based-dynamic-weighting-2308.16488"/></url>
<url><loc>https://scifaro.com/en/abs/phonmatchnet-phoneme-guided-zero-shot-keyword-spotting-for-user-defined-keywords-2308.16511</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonmatchnet-phoneme-guided-zero-shot-keyword-spotting-for-user-defined-keywords-2308.16511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonmatchnet-phoneme-guided-zero-shot-keyword-spotting-for-user-defined-keywords-2308.16511"/></url>
<url><loc>https://scifaro.com/en/abs/time-varying-quasi-closed-phase-analysis-for-accurate-formant-tracking-in-speech-signals-2308.16540</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-varying-quasi-closed-phase-analysis-for-accurate-formant-tracking-in-speech-signals-2308.16540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-varying-quasi-closed-phase-analysis-for-accurate-formant-tracking-in-speech-signals-2308.16540"/></url>
<url><loc>https://scifaro.com/en/abs/rezero-region-customizable-sound-extraction-2308.16892</loc><lastmod>2023-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rezero-region-customizable-sound-extraction-2308.16892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rezero-region-customizable-sound-extraction-2308.16892"/></url>
<url><loc>https://scifaro.com/en/abs/repcodec-a-speech-representation-codec-for-speech-tokenization-2309.00169</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repcodec-a-speech-representation-codec-for-speech-tokenization-2309.00169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repcodec-a-speech-representation-codec-for-speech-tokenization-2309.00169"/></url>
<url><loc>https://scifaro.com/en/abs/the-fruitshell-french-synthesis-system-at-the-blizzard-2023-challenge-2309.00223</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-fruitshell-french-synthesis-system-at-the-blizzard-2023-challenge-2309.00223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-fruitshell-french-synthesis-system-at-the-blizzard-2023-challenge-2309.00223"/></url>
<url><loc>https://scifaro.com/en/abs/remixing-based-unsupervised-source-separation-from-scratch-2309.00376</loc><lastmod>2023-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remixing-based-unsupervised-source-separation-from-scratch-2309.00376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remixing-based-unsupervised-source-separation-from-scratch-2309.00376"/></url>
<url><loc>https://scifaro.com/en/abs/learning-speech-representation-from-contrastive-token-acoustic-pretraining-2309.00424</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-speech-representation-from-contrastive-token-acoustic-pretraining-2309.00424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-speech-representation-from-contrastive-token-acoustic-pretraining-2309.00424"/></url>
<url><loc>https://scifaro.com/en/abs/improving-small-footprint-few-shot-keyword-spotting-with-supervision-on-auxiliary-data-2309.00647</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-small-footprint-few-shot-keyword-spotting-with-supervision-on-auxiliary-data-2309.00647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-small-footprint-few-shot-keyword-spotting-with-supervision-on-auxiliary-data-2309.00647"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-for-dysarthric-speech-are-pre-trained-self-supervised-representations-favorable-2309.01108</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-for-dysarthric-speech-are-pre-trained-self-supervised-representations-favorable-2309.01108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-to-articulatory-inversion-for-dysarthric-speech-are-pre-trained-self-supervised-representations-favorable-2309.01108"/></url>
<url><loc>https://scifaro.com/en/abs/msm-vc-high-fidelity-source-style-transfer-for-non-parallel-voice-conversion-by-multi-scale-style-modeling-2309.01142</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msm-vc-high-fidelity-source-style-transfer-for-non-parallel-voice-conversion-by-multi-scale-style-modeling-2309.01142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msm-vc-high-fidelity-source-style-transfer-for-non-parallel-voice-conversion-by-multi-scale-style-modeling-2309.01142"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-speech-emotion-recognition-with-signal-to-noise-ratio-adapting-speech-enhancement-2309.01164</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-speech-emotion-recognition-with-signal-to-noise-ratio-adapting-speech-enhancement-2309.01164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-speech-emotion-recognition-with-signal-to-noise-ratio-adapting-speech-enhancement-2309.01164"/></url>
<url><loc>https://scifaro.com/en/abs/rgi-net-3d-room-geometry-inference-from-room-impulse-responses-with-hidden-first-order-reflections-2309.01513</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rgi-net-3d-room-geometry-inference-from-room-impulse-responses-with-hidden-first-order-reflections-2309.01513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rgi-net-3d-room-geometry-inference-from-room-impulse-responses-with-hidden-first-order-reflections-2309.01513"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-with-deep-complex-u-networks-and-probabilistic-latent-space-models-2309.01535</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-with-deep-complex-u-networks-and-probabilistic-latent-space-models-2309.01535"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-with-deep-complex-u-networks-and-probabilistic-latent-space-models-2309.01535"/></url>
<url><loc>https://scifaro.com/en/abs/pesto-pitch-estimation-with-self-supervised-transposition-equivariant-objective-2309.02265</loc><lastmod>2025-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pesto-pitch-estimation-with-self-supervised-transposition-equivariant-objective-2309.02265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pesto-pitch-estimation-with-self-supervised-transposition-equivariant-objective-2309.02265"/></url>
<url><loc>https://scifaro.com/en/abs/prompttts-2-describing-and-generating-voices-with-text-prompt-2309.02285</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompttts-2-describing-and-generating-voices-with-text-prompt-2309.02285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompttts-2-describing-and-generating-voices-with-text-prompt-2309.02285"/></url>
<url><loc>https://scifaro.com/en/abs/in-ear-voice-towards-milli-watt-audio-enhancement-with-bone-conduction-microphones-for-in-ear-sensing-platforms-2309.02393</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-ear-voice-towards-milli-watt-audio-enhancement-with-bone-conduction-microphones-for-in-ear-sensing-platforms-2309.02393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-ear-voice-towards-milli-watt-audio-enhancement-with-bone-conduction-microphones-for-in-ear-sensing-platforms-2309.02393"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-adaptation-with-pre-trained-speech-encoders-for-continuous-emotion-recognition-2309.02418</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-adaptation-with-pre-trained-speech-encoders-for-continuous-emotion-recognition-2309.02418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-adaptation-with-pre-trained-speech-encoders-for-continuous-emotion-recognition-2309.02418"/></url>
<url><loc>https://scifaro.com/en/abs/employing-real-training-data-for-deep-noise-suppression-2309.02432</loc><lastmod>2023-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/employing-real-training-data-for-deep-noise-suppression-2309.02432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/employing-real-training-data-for-deep-noise-suppression-2309.02432"/></url>
<url><loc>https://scifaro.com/en/abs/minimal-effective-theory-for-phonotactic-memory-capturing-local-correlations-due-to-errors-in-speech-2309.02466</loc><lastmod>2023-09-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimal-effective-theory-for-phonotactic-memory-capturing-local-correlations-due-to-errors-in-speech-2309.02466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimal-effective-theory-for-phonotactic-memory-capturing-local-correlations-due-to-errors-in-speech-2309.02466"/></url>
<url><loc>https://scifaro.com/en/abs/a-generalized-bandsplit-neural-network-for-cinematic-audio-source-separation-2309.02539</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-generalized-bandsplit-neural-network-for-cinematic-audio-source-separation-2309.02539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-generalized-bandsplit-neural-network-for-cinematic-audio-source-separation-2309.02539"/></url>
<url><loc>https://scifaro.com/en/abs/symbolic-music-representations-for-classification-tasks-a-systematic-evaluation-2309.02567</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/symbolic-music-representations-for-classification-tasks-a-systematic-evaluation-2309.02567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/symbolic-music-representations-for-classification-tasks-a-systematic-evaluation-2309.02567"/></url>
<url><loc>https://scifaro.com/en/abs/bwsnet-automatic-perceptual-assessment-of-audio-signals-2309.02592</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bwsnet-automatic-perceptual-assessment-of-audio-signals-2309.02592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bwsnet-automatic-perceptual-assessment-of-audio-signals-2309.02592"/></url>
<url><loc>https://scifaro.com/en/abs/stylebook-content-dependent-speaking-style-modeling-for-any-to-any-voice-conversion-using-only-speech-data-2309.02730</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylebook-content-dependent-speaking-style-modeling-for-any-to-any-voice-conversion-using-only-speech-data-2309.02730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylebook-content-dependent-speaking-style-modeling-for-any-to-any-voice-conversion-using-only-speech-data-2309.02730"/></url>
<url><loc>https://scifaro.com/en/abs/mulantts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2023-2309.02743</loc><lastmod>2023-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mulantts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2023-2309.02743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mulantts-the-microsoft-speech-synthesis-system-for-blizzard-challenge-2023-2309.02743"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-asr-pretrained-conformers-for-speaker-verification-through-transfer-learning-and-knowledge-distillation-2309.03019</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-asr-pretrained-conformers-for-speaker-verification-through-transfer-learning-and-knowledge-distillation-2309.03019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-asr-pretrained-conformers-for-speaker-verification-through-transfer-learning-and-knowledge-distillation-2309.03019"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-auralization-for-performers-on-virtual-stages-2309.03149</loc><lastmod>2026-03-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-auralization-for-performers-on-virtual-stages-2309.03149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-auralization-for-performers-on-virtual-stages-2309.03149"/></url>
<url><loc>https://scifaro.com/en/abs/matcha-tts-a-fast-tts-architecture-with-conditional-flow-matching-2309.03199</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matcha-tts-a-fast-tts-architecture-with-conditional-flow-matching-2309.03199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matcha-tts-a-fast-tts-architecture-with-conditional-flow-matching-2309.03199"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-geometrical-acoustic-simulations-of-spatial-room-impulse-responses-for-improved-sound-event-detection-and-localization-2309.03337</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-geometrical-acoustic-simulations-of-spatial-room-impulse-responses-for-improved-sound-event-detection-and-localization-2309.03337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-geometrical-acoustic-simulations-of-spatial-room-impulse-responses-for-improved-sound-event-detection-and-localization-2309.03337"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-room-transfer-functions-between-transducers-mounted-on-audio-devices-using-a-modified-image-source-method-2309.03486</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-room-transfer-functions-between-transducers-mounted-on-audio-devices-using-a-modified-image-source-method-2309.03486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-room-transfer-functions-between-transducers-mounted-on-audio-devices-using-a-modified-image-source-method-2309.03486"/></url>
<url><loc>https://scifaro.com/en/abs/causal-signal-based-dccrn-with-overlapped-frame-prediction-for-online-speech-enhancement-2309.03684</loc><lastmod>2023-09-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-signal-based-dccrn-with-overlapped-frame-prediction-for-online-speech-enhancement-2309.03684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-signal-based-dccrn-with-overlapped-frame-prediction-for-online-speech-enhancement-2309.03684"/></url>
<url><loc>https://scifaro.com/en/abs/asymmetric-clean-segments-guided-self-supervised-learning-for-robust-speaker-verification-2309.04265</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asymmetric-clean-segments-guided-self-supervised-learning-for-robust-speaker-verification-2309.04265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asymmetric-clean-segments-guided-self-supervised-learning-for-robust-speaker-verification-2309.04265"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-with-acoustic-language-model-pretraining-2309.04516</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-with-acoustic-language-model-pretraining-2309.04516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-and-disfluency-removal-with-acoustic-language-model-pretraining-2309.04516"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-pretrained-image-text-models-for-improving-audio-visual-learning-2309.04628</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-pretrained-image-text-models-for-improving-audio-visual-learning-2309.04628"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-pretrained-image-text-models-for-improving-audio-visual-learning-2309.04628"/></url>
<url><loc>https://scifaro.com/en/abs/voiceflow-efficient-text-to-speech-with-rectified-flow-matching-2309.05027</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceflow-efficient-text-to-speech-with-rectified-flow-matching-2309.05027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceflow-efficient-text-to-speech-with-rectified-flow-matching-2309.05027"/></url>
<url><loc>https://scifaro.com/en/abs/plumbernet-fixing-interference-leakage-after-gev-beamforming-2309.05057</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plumbernet-fixing-interference-leakage-after-gev-beamforming-2309.05057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plumbernet-fixing-interference-leakage-after-gev-beamforming-2309.05057"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speaker-diarization-with-large-language-models-a-contextual-beam-search-approach-2309.05248</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speaker-diarization-with-large-language-models-a-contextual-beam-search-approach-2309.05248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speaker-diarization-with-large-language-models-a-contextual-beam-search-approach-2309.05248"/></url>
<url><loc>https://scifaro.com/en/abs/towards-generalisable-and-calibrated-synthetic-speech-detection-with-self-supervised-representations-2309.05384</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-generalisable-and-calibrated-synthetic-speech-detection-with-self-supervised-representations-2309.05384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-generalisable-and-calibrated-synthetic-speech-detection-with-self-supervised-representations-2309.05384"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-automatic-prosody-annotation-with-contrastive-pretraining-of-sswp-2309.05423</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-automatic-prosody-annotation-with-contrastive-pretraining-of-sswp-2309.05423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-automatic-prosody-annotation-with-contrastive-pretraining-of-sswp-2309.05423"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-co-speech-gesture-generation-using-joint-text-and-audio-representation-2309.05455</loc><lastmod>2023-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-co-speech-gesture-generation-using-joint-text-and-audio-representation-2309.05455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-co-speech-gesture-generation-using-joint-text-and-audio-representation-2309.05455"/></url>
<url><loc>https://scifaro.com/en/abs/smartwatch-derived-acoustic-markers-for-deficits-in-cognitively-relevant-everyday-functioning-2309.05777</loc><lastmod>2023-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smartwatch-derived-acoustic-markers-for-deficits-in-cognitively-relevant-everyday-functioning-2309.05777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smartwatch-derived-acoustic-markers-for-deficits-in-cognitively-relevant-everyday-functioning-2309.05777"/></url>
<url><loc>https://scifaro.com/en/abs/can-large-scale-vocoded-spoofed-data-improve-speech-spoofing-countermeasure-with-a-self-supervised-front-end-2309.06014</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-large-scale-vocoded-spoofed-data-improve-speech-spoofing-countermeasure-with-a-self-supervised-front-end-2309.06014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-large-scale-vocoded-spoofed-data-improve-speech-spoofing-countermeasure-with-a-self-supervised-front-end-2309.06014"/></url>
<url><loc>https://scifaro.com/en/abs/iphonmatchnet-zero-shot-user-defined-keyword-spotting-using-implicit-acoustic-echo-cancellation-2309.06096</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iphonmatchnet-zero-shot-user-defined-keyword-spotting-using-implicit-acoustic-echo-cancellation-2309.06096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iphonmatchnet-zero-shot-user-defined-keyword-spotting-using-implicit-acoustic-echo-cancellation-2309.06096"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-the-generalization-gap-of-learning-based-speech-enhancement-systems-in-noisy-and-reverberant-environments-2309.06183</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-the-generalization-gap-of-learning-based-speech-enhancement-systems-in-noisy-and-reverberant-environments-2309.06183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-the-generalization-gap-of-learning-based-speech-enhancement-systems-in-noisy-and-reverberant-environments-2309.06183"/></url>
<url><loc>https://scifaro.com/en/abs/asped-an-audio-dataset-for-detecting-pedestrians-2309.06531</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asped-an-audio-dataset-for-detecting-pedestrians-2309.06531"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asped-an-audio-dataset-for-detecting-pedestrians-2309.06531"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-the-blind-spots-in-spoken-language-processing-2309.06572</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-the-blind-spots-in-spoken-language-processing-2309.06572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-the-blind-spots-in-spoken-language-processing-2309.06572"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-decomposition-based-on-two-stage-neural-networks-2309.06661</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-decomposition-based-on-two-stage-neural-networks-2309.06661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-decomposition-based-on-two-stage-neural-networks-2309.06661"/></url>
<url><loc>https://scifaro.com/en/abs/vrdmg-vocal-restoration-via-diffusion-posterior-sampling-with-multiple-guidance-2309.06934</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vrdmg-vocal-restoration-via-diffusion-posterior-sampling-with-multiple-guidance-2309.06934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vrdmg-vocal-restoration-via-diffusion-posterior-sampling-with-multiple-guidance-2309.06934"/></url>
<url><loc>https://scifaro.com/en/abs/reorganization-of-the-auditory-perceptual-space-across-the-human-vocal-range-2309.06946</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reorganization-of-the-auditory-perceptual-space-across-the-human-vocal-range-2309.06946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reorganization-of-the-auditory-perceptual-space-across-the-human-vocal-range-2309.06946"/></url>
<url><loc>https://scifaro.com/en/abs/a-flexible-online-framework-for-projection-based-stft-phase-retrieval-2309.07043</loc><lastmod>2023-09-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-flexible-online-framework-for-projection-based-stft-phase-retrieval-2309.07043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-flexible-online-framework-for-projection-based-stft-phase-retrieval-2309.07043"/></url>
<url><loc>https://scifaro.com/en/abs/can-whisper-perform-speech-based-in-context-learning-2309.07081</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-whisper-perform-speech-based-in-context-learning-2309.07081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-whisper-perform-speech-based-in-context-learning-2309.07081"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-asr-for-resource-constrained-robots-hmm-deep-learning-fusion-2309.07164</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-asr-for-resource-constrained-robots-hmm-deep-learning-fusion-2309.07164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-asr-for-resource-constrained-robots-hmm-deep-learning-fusion-2309.07164"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-child-vocalization-classification-with-phonetically-tuned-embeddings-for-assisting-autism-diagnosis-2309.07287</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-child-vocalization-classification-with-phonetically-tuned-embeddings-for-assisting-autism-diagnosis-2309.07287"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-child-vocalization-classification-with-phonetically-tuned-embeddings-for-assisting-autism-diagnosis-2309.07287"/></url>
<url><loc>https://scifaro.com/en/abs/hybrid-attention-based-encoder-decoder-model-for-efficient-language-model-adaptation-2309.07369</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hybrid-attention-based-encoder-decoder-model-for-efficient-language-model-adaptation-2309.07369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hybrid-attention-based-encoder-decoder-model-for-efficient-language-model-adaptation-2309.07369"/></url>
<url><loc>https://scifaro.com/en/abs/training-audio-captioning-models-without-audio-2309.07372</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-audio-captioning-models-without-audio-2309.07372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-audio-captioning-models-without-audio-2309.07372"/></url>
<url><loc>https://scifaro.com/en/abs/towards-universal-speech-discrete-tokens-a-case-study-for-asr-and-tts-2309.07377</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-universal-speech-discrete-tokens-a-case-study-for-asr-and-tts-2309.07377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-universal-speech-discrete-tokens-a-case-study-for-asr-and-tts-2309.07377"/></url>
<url><loc>https://scifaro.com/en/abs/multi-dimensional-speech-quality-assessment-in-crowdsourcing-2309.07385</loc><lastmod>2023-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-dimensional-speech-quality-assessment-in-crowdsourcing-2309.07385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-dimensional-speech-quality-assessment-in-crowdsourcing-2309.07385"/></url>
<url><loc>https://scifaro.com/en/abs/promptasr-for-contextualized-asr-with-controllable-style-2309.07414</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/promptasr-for-contextualized-asr-with-controllable-style-2309.07414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/promptasr-for-contextualized-asr-with-controllable-style-2309.07414"/></url>
<url><loc>https://scifaro.com/en/abs/codec-data-augmentation-for-time-domain-heart-sound-classification-2309.07466</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codec-data-augmentation-for-time-domain-heart-sound-classification-2309.07466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codec-data-augmentation-for-time-domain-heart-sound-classification-2309.07466"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-metadata-information-constrained-self-supervised-learning-for-anomalous-sound-detection-under-domain-shift-2309.07498</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-metadata-information-constrained-self-supervised-learning-for-anomalous-sound-detection-under-domain-shift-2309.07498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-metadata-information-constrained-self-supervised-learning-for-anomalous-sound-detection-under-domain-shift-2309.07498"/></url>
<url><loc>https://scifaro.com/en/abs/emo-stargan-a-semi-supervised-any-to-many-non-parallel-emotion-preserving-voice-conversion-2309.07586</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-stargan-a-semi-supervised-any-to-many-non-parallel-emotion-preserving-voice-conversion-2309.07586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-stargan-a-semi-supervised-any-to-many-non-parallel-emotion-preserving-voice-conversion-2309.07586"/></url>
<url><loc>https://scifaro.com/en/abs/stargan-vc-towards-emotion-preserving-voice-conversion-using-deep-embeddings-2309.07592</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stargan-vc-towards-emotion-preserving-voice-conversion-using-deep-embeddings-2309.07592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stargan-vc-towards-emotion-preserving-voice-conversion-using-deep-embeddings-2309.07592"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-class-based-language-model-for-named-entity-recognition-in-factorized-neural-transducer-2309.07648</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-class-based-language-model-for-named-entity-recognition-in-factorized-neural-transducer-2309.07648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-class-based-language-model-for-named-entity-recognition-in-factorized-neural-transducer-2309.07648"/></url>
<url><loc>https://scifaro.com/en/abs/complexity-scaling-for-speech-denoising-2309.07757</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complexity-scaling-for-speech-denoising-2309.07757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complexity-scaling-for-speech-denoising-2309.07757"/></url>
<url><loc>https://scifaro.com/en/abs/snakegan-a-universal-vocoder-leveraging-ddsp-prior-knowledge-and-periodic-inductive-bias-2309.07803</loc><lastmod>2023-09-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/snakegan-a-universal-vocoder-leveraging-ddsp-prior-knowledge-and-periodic-inductive-bias-2309.07803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/snakegan-a-universal-vocoder-leveraging-ddsp-prior-knowledge-and-periodic-inductive-bias-2309.07803"/></url>
<url><loc>https://scifaro.com/en/abs/emoconv-diff-diffusion-based-speech-emotion-conversion-for-non-parallel-and-in-the-wild-data-2309.07828</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emoconv-diff-diffusion-based-speech-emotion-conversion-for-non-parallel-and-in-the-wild-data-2309.07828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emoconv-diff-diffusion-based-speech-emotion-conversion-for-non-parallel-and-in-the-wild-data-2309.07828"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-audio-visual-information-fusion-with-multi-label-joint-decoding-for-mer-2023-2309.07925</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-audio-visual-information-fusion-with-multi-label-joint-decoding-for-mer-2023-2309.07925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-audio-visual-information-fusion-with-multi-label-joint-decoding-for-mer-2023-2309.07925"/></url>
<url><loc>https://scifaro.com/en/abs/kid-whisper-towards-bridging-the-performance-gap-in-automatic-speech-recognition-for-children-vs-adults-2309.07927</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kid-whisper-towards-bridging-the-performance-gap-in-automatic-speech-recognition-for-children-vs-adults-2309.07927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kid-whisper-towards-bridging-the-performance-gap-in-automatic-speech-recognition-for-children-vs-adults-2309.07927"/></url>
<url><loc>https://scifaro.com/en/abs/voxtlm-unified-decoder-only-models-for-consolidating-speech-recognition-synthesis-and-speech-text-continuation-tasks-2309.07937</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxtlm-unified-decoder-only-models-for-consolidating-speech-recognition-synthesis-and-speech-text-continuation-tasks-2309.07937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxtlm-unified-decoder-only-models-for-consolidating-speech-recognition-synthesis-and-speech-text-continuation-tasks-2309.07937"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-face-detection-with-audio-based-region-proposals-for-human-robot-interactions-2309.08005</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-face-detection-with-audio-based-region-proposals-for-human-robot-interactions-2309.08005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-face-detection-with-audio-based-region-proposals-for-human-robot-interactions-2309.08005"/></url>
<url><loc>https://scifaro.com/en/abs/diarist-streaming-speech-translation-with-speaker-diarization-2309.08007</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diarist-streaming-speech-translation-with-speaker-diarization-2309.08007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diarist-streaming-speech-translation-with-speaker-diarization-2309.08007"/></url>
<url><loc>https://scifaro.com/en/abs/usm-scd-multilingual-speaker-change-detection-based-on-large-pretrained-foundation-models-2309.08023</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usm-scd-multilingual-speaker-change-detection-based-on-large-pretrained-foundation-models-2309.08023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usm-scd-multilingual-speaker-change-detection-based-on-large-pretrained-foundation-models-2309.08023"/></url>
<url><loc>https://scifaro.com/en/abs/av2wav-diffusion-based-re-synthesis-from-continuous-self-supervised-features-for-audio-visual-speech-enhancement-2309.08030</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av2wav-diffusion-based-re-synthesis-from-continuous-self-supervised-features-for-audio-visual-speech-enhancement-2309.08030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av2wav-diffusion-based-re-synthesis-from-continuous-self-supervised-features-for-audio-visual-speech-enhancement-2309.08030"/></url>
<url><loc>https://scifaro.com/en/abs/ddsp-sfx-acoustically-guided-sound-effects-generation-with-differentiable-digital-signal-processing-2309.08060</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddsp-sfx-acoustically-guided-sound-effects-generation-with-differentiable-digital-signal-processing-2309.08060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddsp-sfx-acoustically-guided-sound-effects-generation-with-differentiable-digital-signal-processing-2309.08060"/></url>
<url><loc>https://scifaro.com/en/abs/libriheavy-a-50-000-hours-asr-corpus-with-punctuation-casing-and-context-2309.08105</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libriheavy-a-50-000-hours-asr-corpus-with-punctuation-casing-and-context-2309.08105"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libriheavy-a-50-000-hours-asr-corpus-with-punctuation-casing-and-context-2309.08105"/></url>
<url><loc>https://scifaro.com/en/abs/t-sot-fnt-streaming-multi-talker-asr-with-text-only-domain-adaptation-capability-2309.08131</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-sot-fnt-streaming-multi-talker-asr-with-text-only-domain-adaptation-capability-2309.08131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-sot-fnt-streaming-multi-talker-asr-with-text-only-domain-adaptation-capability-2309.08131"/></url>
<url><loc>https://scifaro.com/en/abs/prompttts-controlling-speaker-identity-in-prompt-based-text-to-speech-using-natural-language-descriptions-2309.08140</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompttts-controlling-speaker-identity-in-prompt-based-text-to-speech-using-natural-language-descriptions-2309.08140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompttts-controlling-speaker-identity-in-prompt-based-text-to-speech-using-natural-language-descriptions-2309.08140"/></url>
<url><loc>https://scifaro.com/en/abs/audio-difference-learning-for-audio-captioning-2309.08141</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-difference-learning-for-audio-captioning-2309.08141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-difference-learning-for-audio-captioning-2309.08141"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tune-the-pretrained-atst-model-for-sound-event-detection-2309.08153</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tune-the-pretrained-atst-model-for-sound-event-detection-2309.08153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tune-the-pretrained-atst-model-for-sound-event-detection-2309.08153"/></url>
<url><loc>https://scifaro.com/en/abs/rvae-em-generative-speech-dereverberation-based-on-recurrent-variational-auto-encoder-and-convolutive-transfer-function-2309.08157</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rvae-em-generative-speech-dereverberation-based-on-recurrent-variational-auto-encoder-and-convolutive-transfer-function-2309.08157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rvae-em-generative-speech-dereverberation-based-on-recurrent-variational-auto-encoder-and-convolutive-transfer-function-2309.08157"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-knowledge-distillation-via-flow-based-voice-conversion-for-robust-polyglot-text-to-speech-2309.08255</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-knowledge-distillation-via-flow-based-voice-conversion-for-robust-polyglot-text-to-speech-2309.08255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-knowledge-distillation-via-flow-based-voice-conversion-for-robust-polyglot-text-to-speech-2309.08255"/></url>
<url><loc>https://scifaro.com/en/abs/improving-voice-conversion-for-dissimilar-speakers-using-perceptual-losses-2309.08263</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-voice-conversion-for-dissimilar-speakers-using-perceptual-losses-2309.08263"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-voice-conversion-for-dissimilar-speakers-using-perceptual-losses-2309.08263"/></url>
<url><loc>https://scifaro.com/en/abs/improving-short-utterance-anti-spoofing-with-aasist2-2309.08279</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-short-utterance-anti-spoofing-with-aasist2-2309.08279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-short-utterance-anti-spoofing-with-aasist2-2309.08279"/></url>
<url><loc>https://scifaro.com/en/abs/one-class-knowledge-distillation-for-spoofing-speech-detection-2309.08285</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-class-knowledge-distillation-for-spoofing-speech-detection-2309.08285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-class-knowledge-distillation-for-spoofing-speech-detection-2309.08285"/></url>
<url><loc>https://scifaro.com/en/abs/head-related-transfer-function-interpolation-with-a-spherical-cnn-2309.08290</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/head-related-transfer-function-interpolation-with-a-spherical-cnn-2309.08290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/head-related-transfer-function-interpolation-with-a-spherical-cnn-2309.08290"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dependent-modeling-of-own-voice-transfer-characteristics-for-in-ear-microphones-in-hearables-2309.08294</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dependent-modeling-of-own-voice-transfer-characteristics-for-in-ear-microphones-in-hearables-2309.08294"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dependent-modeling-of-own-voice-transfer-characteristics-for-in-ear-microphones-in-hearables-2309.08294"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-active-speaker-detection-system-integrating-an-audio-visual-signal-with-a-spatial-querying-mechanism-2309.08295</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-active-speaker-detection-system-integrating-an-audio-visual-signal-with-a-spatial-querying-mechanism-2309.08295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-active-speaker-detection-system-integrating-an-audio-visual-signal-with-a-spatial-querying-mechanism-2309.08295"/></url>
<url><loc>https://scifaro.com/en/abs/diff-sv-a-unified-hierarchical-framework-for-noise-robust-speaker-verification-using-score-based-diffusion-probabilistic-models-2309.08320</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-sv-a-unified-hierarchical-framework-for-noise-robust-speaker-verification-using-score-based-diffusion-probabilistic-models-2309.08320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-sv-a-unified-hierarchical-framework-for-noise-robust-speaker-verification-using-score-based-diffusion-probabilistic-models-2309.08320"/></url>
<url><loc>https://scifaro.com/en/abs/the-multimodal-information-based-speech-processing-misp-2023-challenge-audio-visual-target-speaker-extraction-2309.08348</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-multimodal-information-based-speech-processing-misp-2023-challenge-audio-visual-target-speaker-extraction-2309.08348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-multimodal-information-based-speech-processing-misp-2023-challenge-audio-visual-target-speaker-extraction-2309.08348"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-sound-event-detection-with-local-and-global-consistency-regularization-2309.08355</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-sound-event-detection-with-local-and-global-consistency-regularization-2309.08355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-sound-event-detection-with-local-and-global-consistency-regularization-2309.08355"/></url>
<url><loc>https://scifaro.com/en/abs/audio-free-prompt-tuning-for-language-audio-models-2309.08357</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-free-prompt-tuning-for-language-audio-models-2309.08357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-free-prompt-tuning-for-language-audio-models-2309.08357"/></url>
<url><loc>https://scifaro.com/en/abs/diacorrect-error-correction-back-end-for-speaker-diarization-2309.08377</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diacorrect-error-correction-back-end-for-speaker-diarization-2309.08377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diacorrect-error-correction-back-end-for-speaker-diarization-2309.08377"/></url>
<url><loc>https://scifaro.com/en/abs/chunked-attention-based-encoder-decoder-model-for-streaming-speech-recognition-2309.08436</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chunked-attention-based-encoder-decoder-model-for-streaming-speech-recognition-2309.08436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chunked-attention-based-encoder-decoder-model-for-streaming-speech-recognition-2309.08436"/></url>
<url><loc>https://scifaro.com/en/abs/combining-tf-gridnet-and-mixture-encoder-for-continuous-speech-separation-for-meeting-transcription-2309.08454</loc><lastmod>2025-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-tf-gridnet-and-mixture-encoder-for-continuous-speech-separation-for-meeting-transcription-2309.08454"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-tf-gridnet-and-mixture-encoder-for-continuous-speech-separation-for-meeting-transcription-2309.08454"/></url>
<url><loc>https://scifaro.com/en/abs/towards-word-level-end-to-end-neural-speaker-diarization-with-auxiliary-network-2309.08489</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-word-level-end-to-end-neural-speaker-diarization-with-auxiliary-network-2309.08489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-word-level-end-to-end-neural-speaker-diarization-with-auxiliary-network-2309.08489"/></url>
<url><loc>https://scifaro.com/en/abs/open-vocabulary-keyword-spotting-with-adaptive-instance-normalization-2309.08561</loc><lastmod>2023-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-vocabulary-keyword-spotting-with-adaptive-instance-normalization-2309.08561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-vocabulary-keyword-spotting-with-adaptive-instance-normalization-2309.08561"/></url>
<url><loc>https://scifaro.com/en/abs/music-source-separation-based-on-a-lightweight-deep-learning-framework-dttnet-dual-path-tfc-tdf-unet-2309.08684</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-source-separation-based-on-a-lightweight-deep-learning-framework-dttnet-dual-path-tfc-tdf-unet-2309.08684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-source-separation-based-on-a-lightweight-deep-learning-framework-dttnet-dual-path-tfc-tdf-unet-2309.08684"/></url>
<url><loc>https://scifaro.com/en/abs/musilingo-bridging-music-and-text-with-pre-trained-language-models-for-music-captioning-and-query-response-2309.08730</loc><lastmod>2024-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musilingo-bridging-music-and-text-with-pre-trained-language-models-for-music-captioning-and-query-response-2309.08730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musilingo-bridging-music-and-text-with-pre-trained-language-models-for-music-captioning-and-query-response-2309.08730"/></url>
<url><loc>https://scifaro.com/en/abs/stack-and-delay-a-new-codebook-pattern-for-music-generation-2309.08804</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stack-and-delay-a-new-codebook-pattern-for-music-generation-2309.08804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stack-and-delay-a-new-codebook-pattern-for-music-generation-2309.08804"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-end-to-end-multilingual-phoneme-recognition-through-exploiting-universal-speech-attributes-constraints-2309.08828</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-end-to-end-multilingual-phoneme-recognition-through-exploiting-universal-speech-attributes-constraints-2309.08828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-end-to-end-multilingual-phoneme-recognition-through-exploiting-universal-speech-attributes-constraints-2309.08828"/></url>
<url><loc>https://scifaro.com/en/abs/decoder-only-architecture-for-speech-recognition-with-ctc-prompts-and-text-data-augmentation-2309.08876</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoder-only-architecture-for-speech-recognition-with-ctc-prompts-and-text-data-augmentation-2309.08876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoder-only-architecture-for-speech-recognition-with-ctc-prompts-and-text-data-augmentation-2309.08876"/></url>
<url><loc>https://scifaro.com/en/abs/unifying-robustness-and-fidelity-a-comprehensive-study-of-pretrained-generative-methods-for-speech-enhancement-in-adverse-conditions-2309.09028</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unifying-robustness-and-fidelity-a-comprehensive-study-of-pretrained-generative-methods-for-speech-enhancement-in-adverse-conditions-2309.09028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unifying-robustness-and-fidelity-a-comprehensive-study-of-pretrained-generative-methods-for-speech-enhancement-in-adverse-conditions-2309.09028"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speaker-diarization-using-memory-aware-multi-speaker-embedding-with-sequence-to-sequence-architecture-2309.09180</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speaker-diarization-using-memory-aware-multi-speaker-embedding-with-sequence-to-sequence-architecture-2309.09180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speaker-diarization-using-memory-aware-multi-speaker-embedding-with-sequence-to-sequence-architecture-2309.09180"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-inversion-through-self-supervised-embeddings-and-enhanced-tract-variables-2309.09220</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-inversion-through-self-supervised-embeddings-and-enhanced-tract-variables-2309.09220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-inversion-through-self-supervised-embeddings-and-enhanced-tract-variables-2309.09220"/></url>
<url><loc>https://scifaro.com/en/abs/promptvc-flexible-stylistic-voice-conversion-in-latent-space-driven-by-natural-language-prompts-2309.09262</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/promptvc-flexible-stylistic-voice-conversion-in-latent-space-driven-by-natural-language-prompts-2309.09262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/promptvc-flexible-stylistic-voice-conversion-in-latent-space-driven-by-natural-language-prompts-2309.09262"/></url>
<url><loc>https://scifaro.com/en/abs/continuous-modeling-of-the-denoising-process-for-speech-enhancement-based-on-deep-learning-2309.09270</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continuous-modeling-of-the-denoising-process-for-speech-enhancement-based-on-deep-learning-2309.09270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continuous-modeling-of-the-denoising-process-for-speech-enhancement-based-on-deep-learning-2309.09270"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-through-language-prompt-tuning-and-frame-level-language-adapter-2309.09443</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-through-language-prompt-tuning-and-frame-level-language-adapter-2309.09443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-multilingual-speech-recognition-through-language-prompt-tuning-and-frame-level-language-adapter-2309.09443"/></url>
<url><loc>https://scifaro.com/en/abs/hiftnet-a-fast-high-quality-neural-vocoder-with-harmonic-plus-noise-filter-and-inverse-short-time-fourier-transform-2309.09493</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hiftnet-a-fast-high-quality-neural-vocoder-with-harmonic-plus-noise-filter-and-inverse-short-time-fourier-transform-2309.09493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hiftnet-a-fast-high-quality-neural-vocoder-with-harmonic-plus-noise-filter-and-inverse-short-time-fourier-transform-2309.09493"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-superb-towards-a-dynamic-collaborative-and-comprehensive-instruction-tuning-benchmark-for-speech-2309.09510</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-superb-towards-a-dynamic-collaborative-and-comprehensive-instruction-tuning-benchmark-for-speech-2309.09510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-superb-towards-a-dynamic-collaborative-and-comprehensive-instruction-tuning-benchmark-for-speech-2309.09510"/></url>
<url><loc>https://scifaro.com/en/abs/training-dynamic-models-using-early-exits-for-automatic-speech-recognition-on-resource-constrained-devices-2309.09546</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/training-dynamic-models-using-early-exits-for-automatic-speech-recognition-on-resource-constrained-devices-2309.09546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/training-dynamic-models-using-early-exits-for-automatic-speech-recognition-on-resource-constrained-devices-2309.09546"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-aids-using-whisper-and-metadata-2309.09548</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-aids-using-whisper-and-metadata-2309.09548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-speech-intelligibility-prediction-for-hearing-aids-using-whisper-and-metadata-2309.09548"/></url>
<url><loc>https://scifaro.com/en/abs/refining-dnn-based-mask-estimation-using-cgmm-based-em-algorithm-for-multi-channel-noise-reduction-2309.09630</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refining-dnn-based-mask-estimation-using-cgmm-based-em-algorithm-for-multi-channel-noise-reduction-2309.09630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refining-dnn-based-mask-estimation-using-cgmm-based-em-algorithm-for-multi-channel-noise-reduction-2309.09630"/></url>
<url><loc>https://scifaro.com/en/abs/single-and-few-step-diffusion-for-generative-speech-enhancement-2309.09677</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-and-few-step-diffusion-for-generative-speech-enhancement-2309.09677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-and-few-step-diffusion-for-generative-speech-enhancement-2309.09677"/></url>
<url><loc>https://scifaro.com/en/abs/recap-retrieval-augmented-audio-captioning-2309.09836</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recap-retrieval-augmented-audio-captioning-2309.09836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recap-retrieval-augmented-audio-captioning-2309.09836"/></url>
<url><loc>https://scifaro.com/en/abs/distilling-hubert-with-lstms-via-decoupled-knowledge-distillation-2309.09920</loc><lastmod>2023-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distilling-hubert-with-lstms-via-decoupled-knowledge-distillation-2309.09920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distilling-hubert-with-lstms-via-decoupled-knowledge-distillation-2309.09920"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-end-to-end-asr-architectures-for-long-form-audio-transcription-2309.09950</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-end-to-end-asr-architectures-for-long-form-audio-transcription-2309.09950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-end-to-end-asr-architectures-for-long-form-audio-transcription-2309.09950"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-recognition-for-african-american-english-with-audio-classification-2309.09996</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-recognition-for-african-american-english-with-audio-classification-2309.09996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-recognition-for-african-american-english-with-audio-classification-2309.09996"/></url>
<url><loc>https://scifaro.com/en/abs/htec-human-transcription-error-correction-2309.10089</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/htec-human-transcription-error-correction-2309.10089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/htec-human-transcription-error-correction-2309.10089"/></url>
<url><loc>https://scifaro.com/en/abs/using-fine-tuning-and-min-lookahead-beam-search-to-improve-whisper-2309.10299</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-fine-tuning-and-min-lookahead-beam-search-to-improve-whisper-2309.10299"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-fine-tuning-and-min-lookahead-beam-search-to-improve-whisper-2309.10299"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-2309.10455</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-2309.10455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-ultrasound-tongue-images-for-audio-visual-speech-enhancement-2309.10455"/></url>
<url><loc>https://scifaro.com/en/abs/harnessing-the-zero-shot-power-of-instruction-tuned-large-language-model-in-end-to-end-speech-recognition-2309.10524</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harnessing-the-zero-shot-power-of-instruction-tuned-large-language-model-in-end-to-end-speech-recognition-2309.10524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harnessing-the-zero-shot-power-of-instruction-tuned-large-language-model-in-end-to-end-speech-recognition-2309.10524"/></url>
<url><loc>https://scifaro.com/en/abs/foleygen-visually-guided-audio-generation-2309.10537</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foleygen-visually-guided-audio-generation-2309.10537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foleygen-visually-guided-audio-generation-2309.10537"/></url>
<url><loc>https://scifaro.com/en/abs/an-active-noise-control-system-based-on-soundfield-interpolation-using-a-physics-informed-neural-network-2309.10605</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-active-noise-control-system-based-on-soundfield-interpolation-using-a-physics-informed-neural-network-2309.10605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-active-noise-control-system-based-on-soundfield-interpolation-using-a-physics-informed-neural-network-2309.10605"/></url>
<url><loc>https://scifaro.com/en/abs/corpus-synthesis-for-zero-shot-asr-domain-adaptation-using-large-language-models-2309.10707</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/corpus-synthesis-for-zero-shot-asr-domain-adaptation-using-large-language-models-2309.10707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/corpus-synthesis-for-zero-shot-asr-domain-adaptation-using-large-language-models-2309.10707"/></url>
<url><loc>https://scifaro.com/en/abs/av-superb-a-multi-task-evaluation-benchmark-for-audio-visual-representation-models-2309.10787</loc><lastmod>2024-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-superb-a-multi-task-evaluation-benchmark-for-audio-visual-representation-models-2309.10787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-superb-a-multi-task-evaluation-benchmark-for-audio-visual-representation-models-2309.10787"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speech-enhancement-for-low-resource-speech-synthesis-2309.10795</loc><lastmod>2023-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speech-enhancement-for-low-resource-speech-synthesis-2309.10795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speech-enhancement-for-low-resource-speech-synthesis-2309.10795"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-contextualization-with-large-language-models-2309.10917</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-contextualization-with-large-language-models-2309.10917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-contextualization-with-large-language-models-2309.10917"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-audio-representation-as-an-alternative-to-mel-spectrograms-for-speaker-and-speech-recognition-2309.10922</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-audio-representation-as-an-alternative-to-mel-spectrograms-for-speaker-and-speech-recognition-2309.10922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-audio-representation-as-an-alternative-to-mel-spectrograms-for-speaker-and-speech-recognition-2309.10922"/></url>
<url><loc>https://scifaro.com/en/abs/ensembling-multilingual-pre-trained-models-for-predicting-multi-label-regression-emotion-share-from-speech-2309.11014</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ensembling-multilingual-pre-trained-models-for-predicting-multi-label-regression-emotion-share-from-speech-2309.11014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ensembling-multilingual-pre-trained-models-for-predicting-multi-label-regression-emotion-share-from-speech-2309.11014"/></url>
<url><loc>https://scifaro.com/en/abs/deep-complex-u-net-with-conformer-for-audio-visual-speech-enhancement-2309.11059</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-complex-u-net-with-conformer-for-audio-visual-speech-enhancement-2309.11059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-complex-u-net-with-conformer-for-audio-visual-speech-enhancement-2309.11059"/></url>
<url><loc>https://scifaro.com/en/abs/speak-while-you-think-streaming-speech-synthesis-during-text-generation-2309.11210</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-while-you-think-streaming-speech-synthesis-during-text-generation-2309.11210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-while-you-think-streaming-speech-synthesis-during-text-generation-2309.11210"/></url>
<url><loc>https://scifaro.com/en/abs/joint-minimum-processing-beamforming-and-near-end-listening-enhancement-2309.11243</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-minimum-processing-beamforming-and-near-end-listening-enhancement-2309.11243"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-minimum-processing-beamforming-and-near-end-listening-enhancement-2309.11243"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-data-collection-and-unsupervised-learning-for-code-switched-tunisian-arabic-automatic-speech-recognition-2309.11327</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-data-collection-and-unsupervised-learning-for-code-switched-tunisian-arabic-automatic-speech-recognition-2309.11327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-data-collection-and-unsupervised-learning-for-code-switched-tunisian-arabic-automatic-speech-recognition-2309.11327"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-tts-system-with-parallel-prosody-transfer-from-unseen-speakers-2309.11487</loc><lastmod>2023-09-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-tts-system-with-parallel-prosody-transfer-from-unseen-speakers-2309.11487"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-tts-system-with-parallel-prosody-transfer-from-unseen-speakers-2309.11487"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-in-the-wild-data-for-effective-self-supervised-pretraining-in-speaker-recognition-2309.11730</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-in-the-wild-data-for-effective-self-supervised-pretraining-in-speaker-recognition-2309.11730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-in-the-wild-data-for-effective-self-supervised-pretraining-in-speaker-recognition-2309.11730"/></url>
<url><loc>https://scifaro.com/en/abs/sparsely-shared-lora-on-whisper-for-child-speech-recognition-2309.11756</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsely-shared-lora-on-whisper-for-child-speech-recognition-2309.11756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsely-shared-lora-on-whisper-for-child-speech-recognition-2309.11756"/></url>
<url><loc>https://scifaro.com/en/abs/comflp-correlation-measure-based-fast-search-on-asr-layer-pruning-2309.11768</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comflp-correlation-measure-based-fast-search-on-asr-layer-pruning-2309.11768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comflp-correlation-measure-based-fast-search-on-asr-layer-pruning-2309.11768"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-silence-on-speech-anti-spoofing-2309.11827</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-silence-on-speech-anti-spoofing-2309.11827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-silence-on-speech-anti-spoofing-2309.11827"/></url>
<url><loc>https://scifaro.com/en/abs/cluster-based-pruning-techniques-for-audio-data-2309.11922</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cluster-based-pruning-techniques-for-audio-data-2309.11922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cluster-based-pruning-techniques-for-audio-data-2309.11922"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-mosra-mean-opinion-score-and-room-acoustics-estimation-using-simulated-data-and-a-teacher-model-2309.11976</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-mosra-mean-opinion-score-and-room-acoustics-estimation-using-simulated-data-and-a-teacher-model-2309.11976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-mosra-mean-opinion-score-and-room-acoustics-estimation-using-simulated-data-and-a-teacher-model-2309.11976"/></url>
<url><loc>https://scifaro.com/en/abs/is-the-ideal-ratio-mask-really-the-best-exploring-the-best-extraction-performance-and-optimal-mask-of-mask-based-beamformers-2309.12065</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-the-ideal-ratio-mask-really-the-best-exploring-the-best-extraction-performance-and-optimal-mask-of-mask-based-beamformers-2309.12065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-the-ideal-ratio-mask-really-the-best-exploring-the-best-extraction-performance-and-optimal-mask-of-mask-based-beamformers-2309.12065"/></url>
<url><loc>https://scifaro.com/en/abs/a-multiscale-autoencoder-msae-framework-for-end-to-end-neural-network-speech-enhancement-2309.12121</loc><lastmod>2023-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multiscale-autoencoder-msae-framework-for-end-to-end-neural-network-speech-enhancement-2309.12121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multiscale-autoencoder-msae-framework-for-end-to-end-neural-network-speech-enhancement-2309.12121"/></url>
<url><loc>https://scifaro.com/en/abs/icassp-2023-acoustic-echo-cancellation-challenge-2309.12553</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icassp-2023-acoustic-echo-cancellation-challenge-2309.12553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icassp-2023-acoustic-echo-cancellation-challenge-2309.12553"/></url>
<url><loc>https://scifaro.com/en/abs/sampling-frequency-independent-universal-sound-separation-2309.12581</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sampling-frequency-independent-universal-sound-separation-2309.12581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sampling-frequency-independent-universal-sound-separation-2309.12581"/></url>
<url><loc>https://scifaro.com/en/abs/spgm-prioritizing-local-features-for-enhanced-speech-separation-performance-2309.12608</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spgm-prioritizing-local-features-for-enhanced-speech-separation-performance-2309.12608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spgm-prioritizing-local-features-for-enhanced-speech-separation-performance-2309.12608"/></url>
<url><loc>https://scifaro.com/en/abs/ntt-speaker-diarization-system-for-chime-7-multi-domain-multi-microphone-end-to-end-and-vector-clustering-diarization-2309.12656</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ntt-speaker-diarization-system-for-chime-7-multi-domain-multi-microphone-end-to-end-and-vector-clustering-diarization-2309.12656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ntt-speaker-diarization-system-for-chime-7-multi-domain-multi-microphone-end-to-end-and-vector-clustering-diarization-2309.12656"/></url>
<url><loc>https://scifaro.com/en/abs/big-model-only-for-hard-audios-sample-dependent-whisper-model-selection-for-efficient-inferences-2309.12712</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/big-model-only-for-hard-audios-sample-dependent-whisper-model-selection-for-efficient-inferences-2309.12712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/big-model-only-for-hard-audios-sample-dependent-whisper-model-selection-for-efficient-inferences-2309.12712"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-representations-improve-supervised-learning-in-speech-emotion-recognition-2309.12714</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-representations-improve-supervised-learning-in-speech-emotion-recognition-2309.12714"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-representations-improve-supervised-learning-in-speech-emotion-recognition-2309.12714"/></url>
<url><loc>https://scifaro.com/en/abs/reduce-reuse-recycle-is-perturbed-data-better-than-other-language-augmentation-for-low-resource-self-supervised-speech-models-2309.12763</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduce-reuse-recycle-is-perturbed-data-better-than-other-language-augmentation-for-low-resource-self-supervised-speech-models-2309.12763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduce-reuse-recycle-is-perturbed-data-better-than-other-language-augmentation-for-low-resource-self-supervised-speech-models-2309.12763"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-incorporating-whisper-for-robust-speech-assessment-2309.12766</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-incorporating-whisper-for-robust-speech-assessment-2309.12766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-incorporating-whisper-for-robust-speech-assessment-2309.12766"/></url>
<url><loc>https://scifaro.com/en/abs/durian-e-duration-informed-attention-network-for-expressive-text-to-speech-synthesis-2309.12792</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/durian-e-duration-informed-attention-network-for-expressive-text-to-speech-synthesis-2309.12792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/durian-e-duration-informed-attention-network-for-expressive-text-to-speech-synthesis-2309.12792"/></url>
<url><loc>https://scifaro.com/en/abs/vic-kd-variance-invariance-covariance-knowledge-distillation-to-make-keyword-spotting-more-robust-against-adversarial-attacks-2309.12914</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vic-kd-variance-invariance-covariance-knowledge-distillation-to-make-keyword-spotting-more-robust-against-adversarial-attacks-2309.12914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vic-kd-variance-invariance-covariance-knowledge-distillation-to-make-keyword-spotting-more-robust-against-adversarial-attacks-2309.12914"/></url>
<url><loc>https://scifaro.com/en/abs/massive-end-to-end-models-for-short-search-queries-2309.12963</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/massive-end-to-end-models-for-short-search-queries-2309.12963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/massive-end-to-end-models-for-short-search-queries-2309.12963"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-asr-pathways-an-adaptive-masking-approach-towards-efficient-pruning-of-a-multilingual-asr-model-2309.13018</loc><lastmod>2025-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-asr-pathways-an-adaptive-masking-approach-towards-efficient-pruning-of-a-multilingual-asr-model-2309.13018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-asr-pathways-an-adaptive-masking-approach-towards-efficient-pruning-of-a-multilingual-asr-model-2309.13018"/></url>
<url><loc>https://scifaro.com/en/abs/memory-augmented-conformer-for-improved-end-to-end-long-form-asr-2309.13029</loc><lastmod>2023-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-augmented-conformer-for-improved-end-to-end-long-form-asr-2309.13029"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-augmented-conformer-for-improved-end-to-end-long-form-asr-2309.13029"/></url>
<url><loc>https://scifaro.com/en/abs/importance-of-smoothness-induced-by-optimizers-in-fl4asr-towards-understanding-federated-learning-for-end-to-end-asr-2309.13102</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/importance-of-smoothness-induced-by-optimizers-in-fl4asr-towards-understanding-federated-learning-for-end-to-end-asr-2309.13102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/importance-of-smoothness-induced-by-optimizers-in-fl4asr-towards-understanding-federated-learning-for-end-to-end-asr-2309.13102"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-speaker-embedding-with-sequential-disentanglement-2309.13253</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-speaker-embedding-with-sequential-disentanglement-2309.13253"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-speaker-embedding-with-sequential-disentanglement-2309.13253"/></url>
<url><loc>https://scifaro.com/en/abs/attention-is-all-you-need-for-blind-room-volume-estimation-2309.13504</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-is-all-you-need-for-blind-room-volume-estimation-2309.13504"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-is-all-you-need-for-blind-room-volume-estimation-2309.13504"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-frequency-domain-auto-regressive-modeling-2309.13537</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-frequency-domain-auto-regressive-modeling-2309.13537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-frequency-domain-auto-regressive-modeling-2309.13537"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-black-box-speaker-verification-model-adaptation-with-reprogramming-and-backend-learning-2309.13605</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-black-box-speaker-verification-model-adaptation-with-reprogramming-and-backend-learning-2309.13605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-black-box-speaker-verification-model-adaptation-with-reprogramming-and-backend-learning-2309.13605"/></url>
<url><loc>https://scifaro.com/en/abs/cross-modal-alignment-with-optimal-transport-for-ctc-based-asr-2309.13650</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-modal-alignment-with-optimal-transport-for-ctc-based-asr-2309.13650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-modal-alignment-with-optimal-transport-for-ctc-based-asr-2309.13650"/></url>
<url><loc>https://scifaro.com/en/abs/voiceldm-text-to-speech-with-environmental-context-2309.13664</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voiceldm-text-to-speech-with-environmental-context-2309.13664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voiceldm-text-to-speech-with-environmental-context-2309.13664"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-step-approach-for-narrowband-source-localization-in-reverberant-rooms-2309.13819</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-step-approach-for-narrowband-source-localization-in-reverberant-rooms-2309.13819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-step-approach-for-narrowband-source-localization-in-reverberant-rooms-2309.13819"/></url>
<url><loc>https://scifaro.com/en/abs/ddtse-discriminative-diffusion-model-for-target-speech-extraction-2309.13874</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddtse-discriminative-diffusion-model-for-target-speech-extraction-2309.13874"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddtse-discriminative-diffusion-model-for-target-speech-extraction-2309.13874"/></url>
<url><loc>https://scifaro.com/en/abs/autoprep-an-automatic-preprocessing-framework-for-in-the-wild-speech-data-2309.13905</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoprep-an-automatic-preprocessing-framework-for-in-the-wild-speech-data-2309.13905"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoprep-an-automatic-preprocessing-framework-for-in-the-wild-speech-data-2309.13905"/></url>
<url><loc>https://scifaro.com/en/abs/frame-wise-streaming-end-to-end-speaker-diarization-with-non-autoregressive-self-attention-based-attractors-2309.13916</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-wise-streaming-end-to-end-speaker-diarization-with-non-autoregressive-self-attention-based-attractors-2309.13916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-wise-streaming-end-to-end-speaker-diarization-with-non-autoregressive-self-attention-based-attractors-2309.13916"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-classification-systems-against-soft-labels-with-fuzzy-precision-and-recall-2309.13938</loc><lastmod>2023-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-classification-systems-against-soft-labels-with-fuzzy-precision-and-recall-2309.13938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-classification-systems-against-soft-labels-with-fuzzy-precision-and-recall-2309.13938"/></url>
<url><loc>https://scifaro.com/en/abs/connecting-speech-encoder-and-large-language-model-for-asr-2309.13963</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/connecting-speech-encoder-and-large-language-model-for-asr-2309.13963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/connecting-speech-encoder-and-large-language-model-for-asr-2309.13963"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-accent-adaptation-through-masked-language-model-correction-of-discrete-self-supervised-speech-units-2309.13994</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-accent-adaptation-through-masked-language-model-correction-of-discrete-self-supervised-speech-units-2309.13994"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-accent-adaptation-through-masked-language-model-correction-of-discrete-self-supervised-speech-units-2309.13994"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-detection-of-pathological-voice-using-glottal-source-features-2309.14080</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-detection-of-pathological-voice-using-glottal-source-features-2309.14080"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-detection-of-pathological-voice-using-glottal-source-features-2309.14080"/></url>
<url><loc>https://scifaro.com/en/abs/bisinger-bilingual-singing-voice-synthesis-2309.14089</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bisinger-bilingual-singing-voice-synthesis-2309.14089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bisinger-bilingual-singing-voice-synthesis-2309.14089"/></url>
<url><loc>https://scifaro.com/en/abs/wav2vec-based-detection-and-severity-level-classification-of-dysarthria-from-speech-2309.14107</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2vec-based-detection-and-severity-level-classification-of-dysarthria-from-speech-2309.14107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2vec-based-detection-and-severity-level-classification-of-dysarthria-from-speech-2309.14107"/></url>
<url><loc>https://scifaro.com/en/abs/haha-pod-an-attempt-for-laughter-based-non-verbal-speaker-verification-2309.14109</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/haha-pod-an-attempt-for-laughter-based-non-verbal-speaker-verification-2309.14109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/haha-pod-an-attempt-for-laughter-based-non-verbal-speaker-verification-2309.14109"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-anonymization-using-neural-audio-codec-language-models-2309.14129</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-anonymization-using-neural-audio-codec-language-models-2309.14129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-anonymization-using-neural-audio-codec-language-models-2309.14129"/></url>
<url><loc>https://scifaro.com/en/abs/towards-general-purpose-text-instruction-guided-voice-conversion-2309.14324</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-general-purpose-text-instruction-guided-voice-conversion-2309.14324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-general-purpose-text-instruction-guided-voice-conversion-2309.14324"/></url>
<url><loc>https://scifaro.com/en/abs/online-active-learning-for-sound-event-detection-2309.14460</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-active-learning-for-sound-event-detection-2309.14460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-active-learning-for-sound-event-detection-2309.14460"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-impact-of-quantization-and-pruning-of-self-supervised-speech-models-for-downstream-speech-recognition-tasks-in-the-wild-2309.14462</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-impact-of-quantization-and-pruning-of-self-supervised-speech-models-for-downstream-speech-recognition-tasks-in-the-wild-2309.14462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-impact-of-quantization-and-pruning-of-self-supervised-speech-models-for-downstream-speech-recognition-tasks-in-the-wild-2309.14462"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-dsp-assisted-neural-pitch-estimation-with-very-low-complexity-2309.14507</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-dsp-assisted-neural-pitch-estimation-with-very-low-complexity-2309.14507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-dsp-assisted-neural-pitch-estimation-with-very-low-complexity-2309.14507"/></url>
<url><loc>https://scifaro.com/en/abs/nolace-improving-low-complexity-speech-codec-enhancement-through-adaptive-temporal-shaping-2309.14521</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nolace-improving-low-complexity-speech-codec-enhancement-through-adaptive-temporal-shaping-2309.14521"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nolace-improving-low-complexity-speech-codec-enhancement-through-adaptive-temporal-shaping-2309.14521"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-session-variability-leveraging-session-embeddings-for-session-robustness-in-speaker-verification-2309.14741</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-session-variability-leveraging-session-embeddings-for-session-robustness-in-speaker-verification-2309.14741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-session-variability-leveraging-session-embeddings-for-session-robustness-in-speaker-verification-2309.14741"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-rwkv-for-memory-efficient-and-low-latency-streaming-asr-2309.14758</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-rwkv-for-memory-efficient-and-low-latency-streaming-asr-2309.14758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-rwkv-for-memory-efficient-and-low-latency-streaming-asr-2309.14758"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-techniques-for-a-physical-model-of-human-vocalisation-2309.14761</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-techniques-for-a-physical-model-of-human-vocalisation-2309.14761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-techniques-for-a-physical-model-of-human-vocalisation-2309.14761"/></url>
<url><loc>https://scifaro.com/en/abs/segment-level-vectorized-beam-search-based-on-partially-autoregressive-inference-2309.14922</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segment-level-vectorized-beam-search-based-on-partially-autoregressive-inference-2309.14922"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segment-level-vectorized-beam-search-based-on-partially-autoregressive-inference-2309.14922"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneously-learning-speaker-s-direction-and-head-orientation-from-binaural-recordings-2309.15064</loc><lastmod>2023-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneously-learning-speaker-s-direction-and-head-orientation-from-binaural-recordings-2309.15064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneously-learning-speaker-s-direction-and-head-orientation-from-binaural-recordings-2309.15064"/></url>
<url><loc>https://scifaro.com/en/abs/collaborative-watermarking-for-adversarial-speech-synthesis-2309.15224</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collaborative-watermarking-for-adversarial-speech-synthesis-2309.15224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collaborative-watermarking-for-adversarial-speech-synthesis-2309.15224"/></url>
<url><loc>https://scifaro.com/en/abs/dualvc-2-dynamic-masked-convolution-for-unified-streaming-and-non-streaming-voice-conversion-2309.15496</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualvc-2-dynamic-masked-convolution-for-unified-streaming-and-non-streaming-voice-conversion-2309.15496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualvc-2-dynamic-masked-convolution-for-unified-streaming-and-non-streaming-voice-conversion-2309.15496"/></url>
<url><loc>https://scifaro.com/en/abs/why-do-angular-margin-losses-work-well-for-semi-supervised-anomalous-sound-detection-2309.15643</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-do-angular-margin-losses-work-well-for-semi-supervised-anomalous-sound-detection-2309.15643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-do-angular-margin-losses-work-well-for-semi-supervised-anomalous-sound-detection-2309.15643"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-trap-a-low-resource-framework-for-instrument-agnostic-music-transcription-2309.15717</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-trap-a-low-resource-framework-for-instrument-agnostic-music-transcription-2309.15717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-trap-a-low-resource-framework-for-instrument-agnostic-music-transcription-2309.15717"/></url>
<url><loc>https://scifaro.com/en/abs/learning-from-flawed-data-weakly-supervised-automatic-speech-recognition-2309.15796</loc><lastmod>2023-09-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-from-flawed-data-weakly-supervised-automatic-speech-recognition-2309.15796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-from-flawed-data-weakly-supervised-automatic-speech-recognition-2309.15796"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-self-supervised-contrastive-learning-of-spatial-sound-event-representation-2309.15938</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-self-supervised-contrastive-learning-of-spatial-sound-event-representation-2309.15938"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-self-supervised-contrastive-learning-of-spatial-sound-event-representation-2309.15938"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-voice-trigger-detection-based-on-transform-average-concatenate-2309.16036</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-voice-trigger-detection-based-on-transform-average-concatenate-2309.16036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-voice-trigger-detection-based-on-transform-average-concatenate-2309.16036"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-acoustic-howling-suppression-through-recursive-training-of-neural-networks-2309.16048</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-acoustic-howling-suppression-through-recursive-training-of-neural-networks-2309.16048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-acoustic-howling-suppression-through-recursive-training-of-neural-networks-2309.16048"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-augmented-kalman-filter-for-robust-acoustic-howling-suppression-2309.16049</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-augmented-kalman-filter-for-robust-acoustic-howling-suppression-2309.16049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-augmented-kalman-filter-for-robust-acoustic-howling-suppression-2309.16049"/></url>
<url><loc>https://scifaro.com/en/abs/does-single-channel-speech-enhancement-improve-keyword-spotting-accuracy-a-case-study-2309.16060</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/does-single-channel-speech-enhancement-improve-keyword-spotting-accuracy-a-case-study-2309.16060"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/does-single-channel-speech-enhancement-improve-keyword-spotting-accuracy-a-case-study-2309.16060"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-cross-modality-knowledge-transfer-with-sinkhorn-attention-for-ctc-based-asr-2309.16093</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-cross-modality-knowledge-transfer-with-sinkhorn-attention-for-ctc-based-asr-2309.16093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-cross-modality-knowledge-transfer-with-sinkhorn-attention-for-ctc-based-asr-2309.16093"/></url>
<url><loc>https://scifaro.com/en/abs/pp-met-a-real-world-personalized-prompt-based-meeting-transcription-system-2309.16247</loc><lastmod>2023-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pp-met-a-real-world-personalized-prompt-based-meeting-transcription-system-2309.16247"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pp-met-a-real-world-personalized-prompt-based-meeting-transcription-system-2309.16247"/></url>
<url><loc>https://scifaro.com/en/abs/meeting-recognition-with-continuous-speech-separation-and-transcription-supported-diarization-2309.16482</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meeting-recognition-with-continuous-speech-separation-and-transcription-supported-diarization-2309.16482"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meeting-recognition-with-continuous-speech-separation-and-transcription-supported-diarization-2309.16482"/></url>
<url><loc>https://scifaro.com/en/abs/towards-high-resolution-weather-monitoring-with-sound-data-2309.16867</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-high-resolution-weather-monitoring-with-sound-data-2309.16867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-high-resolution-weather-monitoring-with-sound-data-2309.16867"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-code-switching-speech-recognition-with-interactive-language-biases-2309.16953</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-code-switching-speech-recognition-with-interactive-language-biases-2309.16953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-code-switching-speech-recognition-with-interactive-language-biases-2309.16953"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-speech-detection-based-on-temporal-consistency-and-distribution-of-speaker-features-2309.16954</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-speech-detection-based-on-temporal-consistency-and-distribution-of-speaker-features-2309.16954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-speech-detection-based-on-temporal-consistency-and-distribution-of-speaker-features-2309.16954"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-self-supervised-learning-with-ssl-enhanced-tts-2309.17020</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-self-supervised-learning-with-ssl-enhanced-tts-2309.17020"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-self-supervised-learning-with-ssl-enhanced-tts-2309.17020"/></url>
<url><loc>https://scifaro.com/en/abs/wiki-en-asr-adapt-large-scale-synthetic-dataset-for-english-asr-customization-2309.17267</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wiki-en-asr-adapt-large-scale-synthetic-dataset-for-english-asr-customization-2309.17267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wiki-en-asr-adapt-large-scale-synthetic-dataset-for-english-asr-customization-2309.17267"/></url>
<url><loc>https://scifaro.com/en/abs/lrpd-large-replay-parallel-dataset-2309.17298</loc><lastmod>2023-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lrpd-large-replay-parallel-dataset-2309.17298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lrpd-large-replay-parallel-dataset-2309.17298"/></url>
<url><loc>https://scifaro.com/en/abs/toward-universal-speech-enhancement-for-diverse-input-conditions-2309.17384</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-universal-speech-enhancement-for-diverse-input-conditions-2309.17384"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-universal-speech-enhancement-for-diverse-input-conditions-2309.17384"/></url>
<url><loc>https://scifaro.com/en/abs/mechatronic-generation-of-datasets-for-acoustics-research-2310.00587</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mechatronic-generation-of-datasets-for-acoustics-research-2310.00587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mechatronic-generation-of-datasets-for-acoustics-research-2310.00587"/></url>
<url><loc>https://scifaro.com/en/abs/wavelet-scattering-transform-for-improving-generalization-in-low-resourced-spoken-language-identification-2310.00602</loc><lastmod>2023-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavelet-scattering-transform-for-improving-generalization-in-low-resourced-spoken-language-identification-2310.00602"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavelet-scattering-transform-for-improving-generalization-in-low-resourced-spoken-language-identification-2310.00602"/></url>
<url><loc>https://scifaro.com/en/abs/a-fused-deep-denoising-sound-coding-strategy-for-bilateral-cochlear-implants-2310.01122</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-fused-deep-denoising-sound-coding-strategy-for-bilateral-cochlear-implants-2310.01122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-fused-deep-denoising-sound-coding-strategy-for-bilateral-cochlear-implants-2310.01122"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-voice-and-content-with-self-supervision-for-speaker-recognition-2310.01128</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-voice-and-content-with-self-supervision-for-speaker-recognition-2310.01128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-voice-and-content-with-self-supervision-for-speaker-recognition-2310.01128"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-up-music-information-retrieval-training-with-semi-supervised-learning-2310.01353</loc><lastmod>2023-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-up-music-information-retrieval-training-with-semi-supervised-learning-2310.01353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-up-music-information-retrieval-training-with-semi-supervised-learning-2310.01353"/></url>
<url><loc>https://scifaro.com/en/abs/one-model-to-rule-them-all-towards-end-to-end-joint-speaker-diarization-and-speech-recognition-2310.01688</loc><lastmod>2023-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-model-to-rule-them-all-towards-end-to-end-joint-speaker-diarization-and-speech-recognition-2310.01688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-model-to-rule-them-all-towards-end-to-end-joint-speaker-diarization-and-speech-recognition-2310.01688"/></url>
<url><loc>https://scifaro.com/en/abs/preserving-phonemic-distinctions-for-ordinal-regression-a-novel-loss-function-for-automatic-pronunciation-assessment-2310.01839</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preserving-phonemic-distinctions-for-ordinal-regression-a-novel-loss-function-for-automatic-pronunciation-assessment-2310.01839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preserving-phonemic-distinctions-for-ordinal-regression-a-novel-loss-function-for-automatic-pronunciation-assessment-2310.01839"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-child-adult-speaker-classification-in-dyadic-interactions-2310.01867</loc><lastmod>2025-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-child-adult-speaker-classification-in-dyadic-interactions-2310.01867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-child-adult-speaker-classification-in-dyadic-interactions-2310.01867"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-continuous-speech-emotion-recognition-in-real-life-customer-service-call-center-conversations-2310.02281</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-continuous-speech-emotion-recognition-in-real-life-customer-service-call-center-conversations-2310.02281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-continuous-speech-emotion-recognition-in-real-life-customer-service-call-center-conversations-2310.02281"/></url>
<url><loc>https://scifaro.com/en/abs/the-voicemos-challenge-2023-zero-shot-subjective-speech-quality-prediction-for-multiple-domains-2310.02640</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voicemos-challenge-2023-zero-shot-subjective-speech-quality-prediction-for-multiple-domains-2310.02640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voicemos-challenge-2023-zero-shot-subjective-speech-quality-prediction-for-multiple-domains-2310.02640"/></url>
<url><loc>https://scifaro.com/en/abs/continual-contrastive-spoken-language-understanding-2310.02699</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-contrastive-spoken-language-understanding-2310.02699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-contrastive-spoken-language-understanding-2310.02699"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-training-of-vbx-diarization-2310.02732</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-training-of-vbx-diarization-2310.02732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-training-of-vbx-diarization-2310.02732"/></url>
<url><loc>https://scifaro.com/en/abs/vits-based-singing-voice-conversion-leveraging-whisper-and-multi-scale-f0-modeling-2310.02802</loc><lastmod>2023-10-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vits-based-singing-voice-conversion-leveraging-whisper-and-multi-scale-f0-modeling-2310.02802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vits-based-singing-voice-conversion-leveraging-whisper-and-multi-scale-f0-modeling-2310.02802"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-and-adapter-tuning-for-self-supervised-encoder-decoder-speech-model-2310.02971</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-and-adapter-tuning-for-self-supervised-encoder-decoder-speech-model-2310.02971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-and-adapter-tuning-for-self-supervised-encoder-decoder-speech-model-2310.02971"/></url>
<url><loc>https://scifaro.com/en/abs/zero-resource-code-switched-speech-benchmark-using-speech-utterance-pairs-for-multiple-spoken-languages-2310.03018</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-resource-code-switched-speech-benchmark-using-speech-utterance-pairs-for-multiple-spoken-languages-2310.03018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-resource-code-switched-speech-benchmark-using-speech-utterance-pairs-for-multiple-spoken-languages-2310.03018"/></url>
<url><loc>https://scifaro.com/en/abs/vasab-the-variable-size-adaptive-information-bottleneck-for-disentanglement-on-speech-and-singing-voice-2310.03444</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vasab-the-variable-size-adaptive-information-bottleneck-for-disentanglement-on-speech-and-singing-voice-2310.03444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vasab-the-variable-size-adaptive-information-bottleneck-for-disentanglement-on-speech-and-singing-voice-2310.03444"/></url>
<url><loc>https://scifaro.com/en/abs/performance-and-energy-balance-a-comprehensive-study-of-state-of-the-art-sound-event-detection-systems-2310.03455</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-and-energy-balance-a-comprehensive-study-of-state-of-the-art-sound-event-detection-systems-2310.03455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-and-energy-balance-a-comprehensive-study-of-state-of-the-art-sound-event-detection-systems-2310.03455"/></url>
<url><loc>https://scifaro.com/en/abs/the-icassp-sp-cadenza-challenge-music-demixing-remixing-for-hearing-aids-2310.03480</loc><lastmod>2025-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-icassp-sp-cadenza-challenge-music-demixing-remixing-for-hearing-aids-2310.03480"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-icassp-sp-cadenza-challenge-music-demixing-remixing-for-hearing-aids-2310.03480"/></url>
<url><loc>https://scifaro.com/en/abs/latent-filling-latent-space-data-augmentation-for-zero-shot-speech-synthesis-2310.03538</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-filling-latent-space-data-augmentation-for-zero-shot-speech-synthesis-2310.03538"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-filling-latent-space-data-augmentation-for-zero-shot-speech-synthesis-2310.03538"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-localization-using-direct-path-dominance-test-based-on-sound-field-directivity-2310.03688</loc><lastmod>2023-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-localization-using-direct-path-dominance-test-based-on-sound-field-directivity-2310.03688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-localization-using-direct-path-dominance-test-based-on-sound-field-directivity-2310.03688"/></url>
<url><loc>https://scifaro.com/en/abs/audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2310.03889</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2310.03889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-event-relational-graph-representation-learning-for-acoustic-scene-classification-2310.03889"/></url>
<url><loc>https://scifaro.com/en/abs/challenges-and-insights-exploring-3d-spatial-features-and-complex-networks-on-the-misp-dataset-2310.03901</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/challenges-and-insights-exploring-3d-spatial-features-and-complex-networks-on-the-misp-dataset-2310.03901"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/challenges-and-insights-exploring-3d-spatial-features-and-complex-networks-on-the-misp-dataset-2310.03901"/></url>
<url><loc>https://scifaro.com/en/abs/a-privacy-preserving-method-using-secret-key-for-convolutional-neural-network-based-speech-classification-2310.04035</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-privacy-preserving-method-using-secret-key-for-convolutional-neural-network-based-speech-classification-2310.04035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-privacy-preserving-method-using-secret-key-for-convolutional-neural-network-based-speech-classification-2310.04035"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-sampling-and-beamforming-for-spherical-microphone-arrays-2310.04169</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-sampling-and-beamforming-for-spherical-microphone-arrays-2310.04169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-sampling-and-beamforming-for-spherical-microphone-arrays-2310.04169"/></url>
<url><loc>https://scifaro.com/en/abs/zones-of-quiet-in-a-broadband-diffuse-sound-field-2310.04191</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zones-of-quiet-in-a-broadband-diffuse-sound-field-2310.04191"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zones-of-quiet-in-a-broadband-diffuse-sound-field-2310.04191"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-model-based-beamforming-and-independent-steering-for-spherical-loudspeaker-arrays-2310.04202</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-model-based-beamforming-and-independent-steering-for-spherical-loudspeaker-arrays-2310.04202"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-model-based-beamforming-and-independent-steering-for-spherical-loudspeaker-arrays-2310.04202"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-on-the-influence-of-synchronization-error-on-fixed-filter-active-noise-control-2310.04249</loc><lastmod>2023-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-on-the-influence-of-synchronization-error-on-fixed-filter-active-noise-control-2310.04249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-on-the-influence-of-synchronization-error-on-fixed-filter-active-noise-control-2310.04249"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-and-linguistic-representations-for-speech-continuous-emotion-recognition-in-call-center-conversations-2310.04481</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-and-linguistic-representations-for-speech-continuous-emotion-recognition-in-call-center-conversations-2310.04481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-and-linguistic-representations-for-speech-continuous-emotion-recognition-in-call-center-conversations-2310.04481"/></url>
<url><loc>https://scifaro.com/en/abs/dpm-tse-a-diffusion-probabilistic-model-for-target-sound-extraction-2310.04567</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpm-tse-a-diffusion-probabilistic-model-for-target-sound-extraction-2310.04567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpm-tse-a-diffusion-probabilistic-model-for-target-sound-extraction-2310.04567"/></url>
<url><loc>https://scifaro.com/en/abs/spike-triggered-contextual-biasing-for-end-to-end-mandarin-speech-recognition-2310.04657</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spike-triggered-contextual-biasing-for-end-to-end-mandarin-speech-recognition-2310.04657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spike-triggered-contextual-biasing-for-end-to-end-mandarin-speech-recognition-2310.04657"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploration-of-task-decoupling-on-two-stage-neural-post-filter-for-real-time-personalized-acoustic-echo-cancellation-2310.04715</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploration-of-task-decoupling-on-two-stage-neural-post-filter-for-real-time-personalized-acoustic-echo-cancellation-2310.04715"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploration-of-task-decoupling-on-two-stage-neural-post-filter-for-real-time-personalized-acoustic-echo-cancellation-2310.04715"/></url>
<url><loc>https://scifaro.com/en/abs/multi-objective-progressive-clustering-for-semi-supervised-domain-adaptation-in-speaker-verification-2310.04760</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-objective-progressive-clustering-for-semi-supervised-domain-adaptation-in-speaker-verification-2310.04760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-objective-progressive-clustering-for-semi-supervised-domain-adaptation-in-speaker-verification-2310.04760"/></url>
<url><loc>https://scifaro.com/en/abs/conditional-diffusion-model-for-target-speaker-extraction-2310.04791</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conditional-diffusion-model-for-target-speaker-extraction-2310.04791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conditional-diffusion-model-for-target-speaker-extraction-2310.04791"/></url>
<url><loc>https://scifaro.com/en/abs/partial-rank-similarity-minimization-method-for-quality-mos-prediction-of-unseen-speech-synthesis-systems-in-zero-shot-and-semi-supervised-setting-2310.05078</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/partial-rank-similarity-minimization-method-for-quality-mos-prediction-of-unseen-speech-synthesis-systems-in-zero-shot-and-semi-supervised-setting-2310.05078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/partial-rank-similarity-minimization-method-for-quality-mos-prediction-of-unseen-speech-synthesis-systems-in-zero-shot-and-semi-supervised-setting-2310.05078"/></url>
<url><loc>https://scifaro.com/en/abs/unified-speech-and-gesture-synthesis-using-flow-matching-2310.05181</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-speech-and-gesture-synthesis-using-flow-matching-2310.05181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-speech-and-gesture-synthesis-using-flow-matching-2310.05181"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-of-voice-conversion-models-with-large-scale-speech-and-singing-data-the-t13-systems-for-the-singing-voice-conversion-challenge-2023-2310.05203</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-of-voice-conversion-models-with-large-scale-speech-and-singing-data-the-t13-systems-for-the-singing-voice-conversion-challenge-2023-2310.05203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-of-voice-conversion-models-with-large-scale-speech-and-singing-data-the-t13-systems-for-the-singing-voice-conversion-challenge-2023-2310.05203"/></url>
<url><loc>https://scifaro.com/en/abs/thech-report-genuinization-of-speech-waveform-pmf-for-speaker-detection-spoofing-and-countermeasures-2310.05534</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/thech-report-genuinization-of-speech-waveform-pmf-for-speaker-detection-spoofing-and-countermeasures-2310.05534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/thech-report-genuinization-of-speech-waveform-pmf-for-speaker-detection-spoofing-and-countermeasures-2310.05534"/></url>
<url><loc>https://scifaro.com/en/abs/super-denoise-net-speech-super-resolution-with-noise-cancellation-in-low-sampling-rate-noisy-environments-2310.05629</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/super-denoise-net-speech-super-resolution-with-noise-cancellation-in-low-sampling-rate-noisy-environments-2310.05629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/super-denoise-net-speech-super-resolution-with-noise-cancellation-in-low-sampling-rate-noisy-environments-2310.05629"/></url>
<url><loc>https://scifaro.com/en/abs/the-first-cadenza-signal-processing-challenge-improving-music-for-those-with-a-hearing-loss-2310.05799</loc><lastmod>2023-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-first-cadenza-signal-processing-challenge-improving-music-for-those-with-a-hearing-loss-2310.05799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-first-cadenza-signal-processing-challenge-improving-music-for-those-with-a-hearing-loss-2310.05799"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-audio-visual-joint-representations-for-multimodal-large-language-models-2310.05863</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-audio-visual-joint-representations-for-multimodal-large-language-models-2310.05863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-audio-visual-joint-representations-for-multimodal-large-language-models-2310.05863"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-speech-recognition-rescoring-with-pre-trained-language-models-2310.06248</loc><lastmod>2023-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-speech-recognition-rescoring-with-pre-trained-language-models-2310.06248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-speech-recognition-rescoring-with-pre-trained-language-models-2310.06248"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-of-speech-dependent-own-voice-transfer-characteristics-for-hearables-with-in-ear-microphones-2310.06554</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-of-speech-dependent-own-voice-transfer-characteristics-for-hearables-with-in-ear-microphones-2310.06554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-of-speech-dependent-own-voice-transfer-characteristics-for-hearables-with-in-ear-microphones-2310.06554"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-oriented-manipulation-of-speaker-representations-2310.06652</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-oriented-manipulation-of-speaker-representations-2310.06652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-oriented-manipulation-of-speaker-representations-2310.06652"/></url>
<url><loc>https://scifaro.com/en/abs/typing-to-listen-at-the-cocktail-party-text-guided-target-speaker-extraction-2310.07284</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/typing-to-listen-at-the-cocktail-party-text-guided-target-speaker-extraction-2310.07284"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/typing-to-listen-at-the-cocktail-party-text-guided-target-speaker-extraction-2310.07284"/></url>
<url><loc>https://scifaro.com/en/abs/vsanet-real-time-speech-enhancement-based-on-voice-activity-detection-and-causal-spatial-attention-2310.07295</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vsanet-real-time-speech-enhancement-based-on-voice-activity-detection-and-causal-spatial-attention-2310.07295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vsanet-real-time-speech-enhancement-based-on-voice-activity-detection-and-causal-spatial-attention-2310.07295"/></url>
<url><loc>https://scifaro.com/en/abs/magnitude-and-phase-aware-speech-enhancement-with-parallel-sequence-modeling-2310.07316</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/magnitude-and-phase-aware-speech-enhancement-with-parallel-sequence-modeling-2310.07316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/magnitude-and-phase-aware-speech-enhancement-with-parallel-sequence-modeling-2310.07316"/></url>
<url><loc>https://scifaro.com/en/abs/damping-density-of-an-absorptive-shoebox-room-derived-from-the-image-source-method-2310.07363</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/damping-density-of-an-absorptive-shoebox-room-derived-from-the-image-source-method-2310.07363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/damping-density-of-an-absorptive-shoebox-room-derived-from-the-image-source-method-2310.07363"/></url>
<url><loc>https://scifaro.com/en/abs/deep-video-inpainting-guided-by-audio-visual-self-supervision-2310.07663</loc><lastmod>2023-10-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-video-inpainting-guided-by-audio-visual-self-supervision-2310.07663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-video-inpainting-guided-by-audio-visual-self-supervision-2310.07663"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-for-stuttered-speech-instruments-unseen-languages-and-textually-described-voices-2310.08104</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-for-stuttered-speech-instruments-unseen-languages-and-textually-described-voices-2310.08104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-for-stuttered-speech-instruments-unseen-languages-and-textually-described-voices-2310.08104"/></url>
<url><loc>https://scifaro.com/en/abs/fast-word-error-rate-estimation-using-self-supervised-representations-for-speech-and-text-2310.08225</loc><lastmod>2025-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-word-error-rate-estimation-using-self-supervised-representations-for-speech-and-text-2310.08225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-word-error-rate-estimation-using-self-supervised-representations-for-speech-and-text-2310.08225"/></url>
<url><loc>https://scifaro.com/en/abs/a-single-speech-enhancement-model-unifying-dereverberation-denoising-speaker-counting-separation-and-extraction-2310.08277</loc><lastmod>2023-10-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-single-speech-enhancement-model-unifying-dereverberation-denoising-speaker-counting-separation-and-extraction-2310.08277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-single-speech-enhancement-model-unifying-dereverberation-denoising-speaker-counting-separation-and-extraction-2310.08277"/></url>
<url><loc>https://scifaro.com/en/abs/a-cry-for-help-early-detection-of-brain-injury-in-newborns-2310.08338</loc><lastmod>2023-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cry-for-help-early-detection-of-brain-injury-in-newborns-2310.08338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cry-for-help-early-detection-of-brain-injury-in-newborns-2310.08338"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourced-and-automatic-speech-prominence-estimation-2310.08464</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourced-and-automatic-speech-prominence-estimation-2310.08464"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourced-and-automatic-speech-prominence-estimation-2310.08464"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-online-speaker-diarization-with-target-speaker-tracking-2310.08696</loc><lastmod>2023-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-online-speaker-diarization-with-target-speaker-tracking-2310.08696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-online-speaker-diarization-with-target-speaker-tracking-2310.08696"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-rate-attention-based-duration-prediction-for-speed-control-tts-2310.08846</loc><lastmod>2023-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-rate-attention-based-duration-prediction-for-speed-control-tts-2310.08846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-rate-attention-based-duration-prediction-for-speed-control-tts-2310.08846"/></url>
<url><loc>https://scifaro.com/en/abs/corn-co-trained-full-and-no-reference-speech-quality-assessment-2310.09388</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/corn-co-trained-full-and-no-reference-speech-quality-assessment-2310.09388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/corn-co-trained-full-and-no-reference-speech-quality-assessment-2310.09388"/></url>
<url><loc>https://scifaro.com/en/abs/protecting-voice-controlled-devices-against-laser-injection-attacks-2310.09404</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/protecting-voice-controlled-devices-against-laser-injection-attacks-2310.09404"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/protecting-voice-controlled-devices-against-laser-injection-attacks-2310.09404"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-speech-enhancement-and-separation-with-a-unified-deep-neural-network-for-single-dual-talker-scenarios-2310.10026</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-speech-enhancement-and-separation-with-a-unified-deep-neural-network-for-single-dual-talker-scenarios-2310.10026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-speech-enhancement-and-separation-with-a-unified-deep-neural-network-for-single-dual-talker-scenarios-2310.10026"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-audio-emotion-and-intent-recognition-with-large-pre-trained-models-and-bayesian-inference-2310.10179</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-audio-emotion-and-intent-recognition-with-large-pre-trained-models-and-bayesian-inference-2310.10179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-audio-emotion-and-intent-recognition-with-large-pre-trained-models-and-bayesian-inference-2310.10179"/></url>
<url><loc>https://scifaro.com/en/abs/generation-or-replication-auscultating-audio-latent-diffusion-models-2310.10604</loc><lastmod>2023-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generation-or-replication-auscultating-audio-latent-diffusion-models-2310.10604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generation-or-replication-auscultating-audio-latent-diffusion-models-2310.10604"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-models-of-speech-infer-universal-articulatory-kinematics-2310.10788</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-models-of-speech-infer-universal-articulatory-kinematics-2310.10788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-models-of-speech-infer-universal-articulatory-kinematics-2310.10788"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-accent-dialect-identification-and-accentedness-assessment-with-multi-embedding-models-and-automatic-speech-recognition-2310.11004</loc><lastmod>2023-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-accent-dialect-identification-and-accentedness-assessment-with-multi-embedding-models-and-automatic-speech-recognition-2310.11004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-accent-dialect-identification-and-accentedness-assessment-with-multi-embedding-models-and-automatic-speech-recognition-2310.11004"/></url>
<url><loc>https://scifaro.com/en/abs/iterative-shallow-fusion-of-backward-language-model-for-end-to-end-speech-recognition-2310.11010</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iterative-shallow-fusion-of-backward-language-model-for-end-to-end-speech-recognition-2310.11010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iterative-shallow-fusion-of-backward-language-model-for-end-to-end-speech-recognition-2310.11010"/></url>
<url><loc>https://scifaro.com/en/abs/zipformer-a-faster-and-better-encoder-for-automatic-speech-recognition-2310.11230</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zipformer-a-faster-and-better-encoder-for-automatic-speech-recognition-2310.11230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zipformer-a-faster-and-better-encoder-for-automatic-speech-recognition-2310.11230"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-real-time-tracking-of-children-s-reading-with-pointer-network-2310.11486</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-real-time-tracking-of-children-s-reading-with-pointer-network-2310.11486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-real-time-tracking-of-children-s-reading-with-pointer-network-2310.11486"/></url>
<url><loc>https://scifaro.com/en/abs/experimental-results-of-underwater-sound-speed-profile-inversion-by-few-shot-multi-task-learning-2310.11708</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/experimental-results-of-underwater-sound-speed-profile-inversion-by-few-shot-multi-task-learning-2310.11708"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/experimental-results-of-underwater-sound-speed-profile-inversion-by-few-shot-multi-task-learning-2310.11708"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-spoofing-speech-detection-using-rhythm-information-2310.12014</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-spoofing-speech-detection-using-rhythm-information-2310.12014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-spoofing-speech-detection-using-rhythm-information-2310.12014"/></url>
<url><loc>https://scifaro.com/en/abs/dasa-difficulty-aware-semantic-augmentation-for-speaker-verification-2310.12111</loc><lastmod>2023-10-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dasa-difficulty-aware-semantic-augmentation-for-speaker-verification-2310.12111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dasa-difficulty-aware-semantic-augmentation-for-speaker-verification-2310.12111"/></url>
<url><loc>https://scifaro.com/en/abs/property-aware-multi-speaker-data-simulation-a-probabilistic-modelling-technique-for-synthetic-data-generation-2310.12371</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/property-aware-multi-speaker-data-simulation-a-probabilistic-modelling-technique-for-synthetic-data-generation-2310.12371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/property-aware-multi-speaker-data-simulation-a-probabilistic-modelling-technique-for-synthetic-data-generation-2310.12371"/></url>
<url><loc>https://scifaro.com/en/abs/the-chime-7-challenge-system-description-and-performance-of-nemo-team-s-dasr-system-2310.12378</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-chime-7-challenge-system-description-and-performance-of-nemo-team-s-dasr-system-2310.12378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-chime-7-challenge-system-description-and-performance-of-nemo-team-s-dasr-system-2310.12378"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-in-context-learning-of-textless-speech-language-model-for-speech-classification-tasks-2310.12477</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-in-context-learning-of-textless-speech-language-model-for-speech-classification-tasks-2310.12477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-in-context-learning-of-textless-speech-language-model-for-speech-classification-tasks-2310.12477"/></url>
<url><loc>https://scifaro.com/en/abs/on-feature-importance-and-interpretability-of-speaker-representations-2310.12599</loc><lastmod>2023-10-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-feature-importance-and-interpretability-of-speaker-representations-2310.12599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-feature-importance-and-interpretability-of-speaker-representations-2310.12599"/></url>
<url><loc>https://scifaro.com/en/abs/deep-beamforming-for-speech-enhancement-and-speaker-localization-with-an-array-response-aware-loss-function-2310.12837</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-beamforming-for-speech-enhancement-and-speaker-localization-with-an-array-response-aware-loss-function-2310.12837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-beamforming-for-speech-enhancement-and-speaker-localization-with-an-array-response-aware-loss-function-2310.12837"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-speech-abnormalities-with-a-perceiver-based-sequence-classifier-that-leverages-a-universal-speech-model-2310.13010</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-speech-abnormalities-with-a-perceiver-based-sequence-classifier-that-leverages-a-universal-speech-model-2310.13010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-speech-abnormalities-with-a-perceiver-based-sequence-classifier-that-leverages-a-universal-speech-model-2310.13010"/></url>
<url><loc>https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-generative-models-2310.13418</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-generative-models-2310.13418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-generative-models-2310.13418"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-interpolation-using-a-spherical-neural-process-meta-learner-2310.13430</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-interpolation-using-a-spherical-neural-process-meta-learner-2310.13430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-interpolation-using-a-spherical-neural-process-meta-learner-2310.13430"/></url>
<url><loc>https://scifaro.com/en/abs/neural-domain-alignment-for-spoken-language-recognition-based-on-optimal-transport-2310.13471</loc><lastmod>2023-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-domain-alignment-for-spoken-language-recognition-based-on-optimal-transport-2310.13471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-domain-alignment-for-spoken-language-recognition-based-on-optimal-transport-2310.13471"/></url>
<url><loc>https://scifaro.com/en/abs/swg-former-a-sliding-window-graph-convolutional-network-for-simultaneous-spatial-temporal-information-extraction-in-sound-event-localization-and-detection-2310.14016</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/swg-former-a-sliding-window-graph-convolutional-network-for-simultaneous-spatial-temporal-information-extraction-in-sound-event-localization-and-detection-2310.14016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/swg-former-a-sliding-window-graph-convolutional-network-for-simultaneous-spatial-temporal-information-extraction-in-sound-event-localization-and-detection-2310.14016"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-intrapersonal-and-interpersonal-influences-for-automatic-estimation-of-therapist-empathy-in-counseling-conversation-2310.14178</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-intrapersonal-and-interpersonal-influences-for-automatic-estimation-of-therapist-empathy-in-counseling-conversation-2310.14178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-intrapersonal-and-interpersonal-influences-for-automatic-estimation-of-therapist-empathy-in-counseling-conversation-2310.14178"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-prosodic-entrainment-in-relation-to-therapist-empathy-in-counseling-conversation-2310.14181</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-prosodic-entrainment-in-relation-to-therapist-empathy-in-counseling-conversation-2310.14181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-prosodic-entrainment-in-relation-to-therapist-empathy-in-counseling-conversation-2310.14181"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-adversarial-purification-for-speaker-verification-2310.14270</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-adversarial-purification-for-speaker-verification-2310.14270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-adversarial-purification-for-speaker-verification-2310.14270"/></url>
<url><loc>https://scifaro.com/en/abs/mfcc-gan-codec-a-new-ai-based-audio-coding-2310.14300</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mfcc-gan-codec-a-new-ai-based-audio-coding-2310.14300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mfcc-gan-codec-a-new-ai-based-audio-coding-2310.14300"/></url>
<url><loc>https://scifaro.com/en/abs/an-overview-of-text-to-speech-systems-and-media-applications-2310.14301</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-overview-of-text-to-speech-systems-and-media-applications-2310.14301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-overview-of-text-to-speech-systems-and-media-applications-2310.14301"/></url>
<url><loc>https://scifaro.com/en/abs/dpp-tts-diversifying-prosodic-features-of-speech-via-determinantal-point-processes-2310.14663</loc><lastmod>2023-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dpp-tts-diversifying-prosodic-features-of-speech-via-determinantal-point-processes-2310.14663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dpp-tts-diversifying-prosodic-features-of-speech-via-determinantal-point-processes-2310.14663"/></url>
<url><loc>https://scifaro.com/en/abs/prompt-driven-target-speech-diarization-2310.14823</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompt-driven-target-speech-diarization-2310.14823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompt-driven-target-speech-diarization-2310.14823"/></url>
<url><loc>https://scifaro.com/en/abs/gesi-gammachirp-envelope-similarity-index-for-predicting-intelligibility-of-simulated-hearing-loss-sounds-2310.15399</loc><lastmod>2024-03-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gesi-gammachirp-envelope-similarity-index-for-predicting-intelligibility-of-simulated-hearing-loss-sounds-2310.15399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gesi-gammachirp-envelope-similarity-index-for-predicting-intelligibility-of-simulated-hearing-loss-sounds-2310.15399"/></url>
<url><loc>https://scifaro.com/en/abs/the-mason-alberta-phonetic-segmenter-a-forced-alignment-system-based-on-deep-neural-networks-and-interpolation-2310.15425</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-mason-alberta-phonetic-segmenter-a-forced-alignment-system-based-on-deep-neural-networks-and-interpolation-2310.15425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-mason-alberta-phonetic-segmenter-a-forced-alignment-system-based-on-deep-neural-networks-and-interpolation-2310.15425"/></url>
<url><loc>https://scifaro.com/en/abs/foley-vae-generaci-on-de-efectos-de-audio-para-cine-con-inteligencia-artificial-2310.15663</loc><lastmod>2023-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/foley-vae-generaci-on-de-efectos-de-audio-para-cine-con-inteligencia-artificial-2310.15663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/foley-vae-generaci-on-de-efectos-de-audio-para-cine-con-inteligencia-artificial-2310.15663"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-music-classification-models-via-music-source-separation-2310.15845</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-music-classification-models-via-music-source-separation-2310.15845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-music-classification-models-via-music-source-separation-2310.15845"/></url>
<url><loc>https://scifaro.com/en/abs/ia-para-el-mantenimiento-predictivo-en-canteras-modelado-2310.16140</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ia-para-el-mantenimiento-predictivo-en-canteras-modelado-2310.16140"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ia-para-el-mantenimiento-predictivo-en-canteras-modelado-2310.16140"/></url>
<url><loc>https://scifaro.com/en/abs/covariance-blocking-and-whitening-method-for-successive-relative-transfer-function-vector-estimation-in-multi-speaker-scenarios-2310.16327</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covariance-blocking-and-whitening-method-for-successive-relative-transfer-function-vector-estimation-in-multi-speaker-scenarios-2310.16327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covariance-blocking-and-whitening-method-for-successive-relative-transfer-function-vector-estimation-in-multi-speaker-scenarios-2310.16327"/></url>
<url><loc>https://scifaro.com/en/abs/generative-pre-training-for-speech-with-flow-matching-2310.16338</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-pre-training-for-speech-with-flow-matching-2310.16338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-pre-training-for-speech-with-flow-matching-2310.16338"/></url>
<url><loc>https://scifaro.com/en/abs/unix-encoder-a-universal-x-channel-speech-encoder-for-ad-hoc-microphone-array-speech-processing-2310.16367</loc><lastmod>2023-10-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unix-encoder-a-universal-x-channel-speech-encoder-for-ad-hoc-microphone-array-speech-processing-2310.16367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unix-encoder-a-universal-x-channel-speech-encoder-for-ad-hoc-microphone-array-speech-processing-2310.16367"/></url>
<url><loc>https://scifaro.com/en/abs/a-novel-approach-for-object-based-audio-broadcasting-2310.16481</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-novel-approach-for-object-based-audio-broadcasting-2310.16481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-novel-approach-for-object-based-audio-broadcasting-2310.16481"/></url>
<url><loc>https://scifaro.com/en/abs/improved-panning-on-non-equidistant-loudspeakers-with-direct-sound-level-compensation-2310.17004</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-panning-on-non-equidistant-loudspeakers-with-direct-sound-level-compensation-2310.17004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-panning-on-non-equidistant-loudspeakers-with-direct-sound-level-compensation-2310.17004"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-multi-speaker-expressive-speech-synthesis-with-semi-supervised-contrastive-learning-2310.17101</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-multi-speaker-expressive-speech-synthesis-with-semi-supervised-contrastive-learning-2310.17101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-multi-speaker-expressive-speech-synthesis-with-semi-supervised-contrastive-learning-2310.17101"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-neonatal-chest-sound-separation-using-deep-learning-2310.17116</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-neonatal-chest-sound-separation-using-deep-learning-2310.17116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-neonatal-chest-sound-separation-using-deep-learning-2310.17116"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-by-colored-spectrograms-2310.17142</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-by-colored-spectrograms-2310.17142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-by-colored-spectrograms-2310.17142"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-preserving-representation-learning-for-speech-understanding-2310.17194</loc><lastmod>2023-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-preserving-representation-learning-for-speech-understanding-2310.17194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-preserving-representation-learning-for-speech-understanding-2310.17194"/></url>
<url><loc>https://scifaro.com/en/abs/music-recommendation-based-on-audio-fingerprint-2310.17655</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-recommendation-based-on-audio-fingerprint-2310.17655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-recommendation-based-on-audio-fingerprint-2310.17655"/></url>
<url><loc>https://scifaro.com/en/abs/early-detection-of-tuberculosis-with-machine-learning-cough-audio-analysis-towards-more-accessible-global-triaging-usage-2310.17675</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/early-detection-of-tuberculosis-with-machine-learning-cough-audio-analysis-towards-more-accessible-global-triaging-usage-2310.17675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/early-detection-of-tuberculosis-with-machine-learning-cough-audio-analysis-towards-more-accessible-global-triaging-usage-2310.17675"/></url>
<url><loc>https://scifaro.com/en/abs/bert-pin-a-bert-based-framework-for-recovering-missing-data-segments-in-time-series-load-profiles-2310.17742</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bert-pin-a-bert-based-framework-for-recovering-missing-data-segments-in-time-series-load-profiles-2310.17742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bert-pin-a-bert-based-framework-for-recovering-missing-data-segments-in-time-series-load-profiles-2310.17742"/></url>
<url><loc>https://scifaro.com/en/abs/torchaudio-2-1-advancing-speech-recognition-self-supervised-learning-and-audio-processing-components-for-pytorch-2310.17864</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/torchaudio-2-1-advancing-speech-recognition-self-supervised-learning-and-audio-processing-components-for-pytorch-2310.17864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/torchaudio-2-1-advancing-speech-recognition-self-supervised-learning-and-audio-processing-components-for-pytorch-2310.17864"/></url>
<url><loc>https://scifaro.com/en/abs/relative-transfer-function-vector-estimation-for-acoustic-sensor-networks-exploiting-covariance-matrix-structure-2310.18199</loc><lastmod>2023-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relative-transfer-function-vector-estimation-for-acoustic-sensor-networks-exploiting-covariance-matrix-structure-2310.18199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relative-transfer-function-vector-estimation-for-acoustic-sensor-networks-exploiting-covariance-matrix-structure-2310.18199"/></url>
<url><loc>https://scifaro.com/en/abs/mixrep-hidden-representation-mixup-for-low-resource-speech-recognition-2310.18450</loc><lastmod>2025-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixrep-hidden-representation-mixup-for-low-resource-speech-recognition-2310.18450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixrep-hidden-representation-mixup-for-low-resource-speech-recognition-2310.18450"/></url>
<url><loc>https://scifaro.com/en/abs/improved-lossless-coding-for-storage-and-transmission-of-multichannel-immersive-audio-2310.18461</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-lossless-coding-for-storage-and-transmission-of-multichannel-immersive-audio-2310.18461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-lossless-coding-for-storage-and-transmission-of-multichannel-immersive-audio-2310.18461"/></url>
<url><loc>https://scifaro.com/en/abs/seeing-through-the-conversation-audio-visual-speech-separation-based-on-diffusion-model-2310.19581</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seeing-through-the-conversation-audio-visual-speech-separation-based-on-diffusion-model-2310.19581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seeing-through-the-conversation-audio-visual-speech-separation-based-on-diffusion-model-2310.19581"/></url>
<url><loc>https://scifaro.com/en/abs/scenario-aware-audio-visual-tf-gridnet-for-target-speech-extraction-2310.19644</loc><lastmod>2023-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scenario-aware-audio-visual-tf-gridnet-for-target-speech-extraction-2310.19644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scenario-aware-audio-visual-tf-gridnet-for-target-speech-extraction-2310.19644"/></url>
<url><loc>https://scifaro.com/en/abs/intelligibility-prediction-with-a-pretrained-noise-robust-automatic-speech-recognition-model-2310.19817</loc><lastmod>2023-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intelligibility-prediction-with-a-pretrained-noise-robust-automatic-speech-recognition-model-2310.19817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intelligibility-prediction-with-a-pretrained-noise-robust-automatic-speech-recognition-model-2310.19817"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-speaker-localization-with-binaural-microphone-array-incorporating-auditory-filters-and-lateral-angle-estimation-2310.20238</loc><lastmod>2023-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-speaker-localization-with-binaural-microphone-array-incorporating-auditory-filters-and-lateral-angle-estimation-2310.20238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-speaker-localization-with-binaural-microphone-array-incorporating-auditory-filters-and-lateral-angle-estimation-2310.20238"/></url>
<url><loc>https://scifaro.com/en/abs/rir-sf-room-impulse-response-based-spatial-feature-for-target-speech-recognition-in-multi-channel-multi-speaker-scenarios-2311.00146</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rir-sf-room-impulse-response-based-spatial-feature-for-target-speech-recognition-in-multi-channel-multi-speaker-scenarios-2311.00146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rir-sf-room-impulse-response-based-spatial-feature-for-target-speech-recognition-in-multi-channel-multi-speaker-scenarios-2311.00146"/></url>
<url><loc>https://scifaro.com/en/abs/c2c-cough-to-covid-19-detection-in-bhi-2023-data-challenge-2311.00364</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/c2c-cough-to-covid-19-detection-in-bhi-2023-data-challenge-2311.00364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/c2c-cough-to-covid-19-detection-in-bhi-2023-data-challenge-2311.00364"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-large-speech-models-based-representations-for-speech-emotion-recognition-2311.00394</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-large-speech-models-based-representations-for-speech-emotion-recognition-2311.00394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-large-speech-models-based-representations-for-speech-emotion-recognition-2311.00394"/></url>
<url><loc>https://scifaro.com/en/abs/reverberant-sound-field-equalisation-for-an-enhanced-stereo-playback-experience-2311.00624</loc><lastmod>2023-11-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberant-sound-field-equalisation-for-an-enhanced-stereo-playback-experience-2311.00624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberant-sound-field-equalisation-for-an-enhanced-stereo-playback-experience-2311.00624"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-disfluency-detection-from-untranscribed-speech-2311.00867</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-disfluency-detection-from-untranscribed-speech-2311.00867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-disfluency-detection-from-untranscribed-speech-2311.00867"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-tts-driven-by-natural-language-prompts-using-few-human-annotations-2311.01260</loc><lastmod>2023-11-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-tts-driven-by-natural-language-prompts-using-few-human-annotations-2311.01260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-tts-driven-by-natural-language-prompts-using-few-human-annotations-2311.01260"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-frechet-audio-distance-for-generative-music-evaluation-2311.01616</loc><lastmod>2024-03-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-frechet-audio-distance-for-generative-music-evaluation-2311.01616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-frechet-audio-distance-for-generative-music-evaluation-2311.01616"/></url>
<url><loc>https://scifaro.com/en/abs/se-territory-monaural-speech-enhancement-meets-the-fixed-virtual-perceptual-space-mapping-2311.01679</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/se-territory-monaural-speech-enhancement-meets-the-fixed-virtual-perceptual-space-mapping-2311.01679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/se-territory-monaural-speech-enhancement-meets-the-fixed-virtual-perceptual-space-mapping-2311.01679"/></url>
<url><loc>https://scifaro.com/en/abs/transduce-and-speak-neural-transducer-for-text-to-speech-with-semantic-token-prediction-2311.02898</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transduce-and-speak-neural-transducer-for-text-to-speech-with-semantic-token-prediction-2311.02898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transduce-and-speak-neural-transducer-for-text-to-speech-with-semantic-token-prediction-2311.02898"/></url>
<url><loc>https://scifaro.com/en/abs/learning-disentangled-speech-representations-2311.03389</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-disentangled-speech-representations-2311.03389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-disentangled-speech-representations-2311.03389"/></url>
<url><loc>https://scifaro.com/en/abs/personalizing-keyword-spotting-with-speaker-information-2311.03419</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalizing-keyword-spotting-with-speaker-information-2311.03419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalizing-keyword-spotting-with-speaker-information-2311.03419"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-estimation-in-the-wild-2311.03560</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-estimation-in-the-wild-2311.03560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-estimation-in-the-wild-2311.03560"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-convergence-model-in-bengali-speech-recognition-2311.04122</loc><lastmod>2023-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-convergence-model-in-bengali-speech-recognition-2311.04122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-convergence-model-in-bengali-speech-recognition-2311.04122"/></url>
<url><loc>https://scifaro.com/en/abs/instrumentgen-generating-sample-based-musical-instruments-from-text-2311.04339</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instrumentgen-generating-sample-based-musical-instruments-from-text-2311.04339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instrumentgen-generating-sample-based-musical-instruments-from-text-2311.04339"/></url>
<url><loc>https://scifaro.com/en/abs/selective-hubert-self-supervised-pre-training-for-target-speaker-in-clean-and-mixture-speech-2311.04526</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selective-hubert-self-supervised-pre-training-for-target-speaker-in-clean-and-mixture-speech-2311.04526"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selective-hubert-self-supervised-pre-training-for-target-speaker-in-clean-and-mixture-speech-2311.04526"/></url>
<url><loc>https://scifaro.com/en/abs/diff-hiervc-diffusion-based-hierarchical-voice-conversion-with-robust-pitch-generation-and-masked-prior-for-zero-shot-speaker-adaptation-2311.04693</loc><lastmod>2023-11-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-hiervc-diffusion-based-hierarchical-voice-conversion-with-robust-pitch-generation-and-masked-prior-for-zero-shot-speaker-adaptation-2311.04693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-hiervc-diffusion-based-hierarchical-voice-conversion-with-robust-pitch-generation-and-masked-prior-for-zero-shot-speaker-adaptation-2311.04693"/></url>
<url><loc>https://scifaro.com/en/abs/1spu-1-step-speech-processing-unit-2311.04753</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1spu-1-step-speech-processing-unit-2311.04753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1spu-1-step-speech-processing-unit-2311.04753"/></url>
<url><loc>https://scifaro.com/en/abs/gpu-accelerated-wfst-beam-search-decoder-for-ctc-based-speech-recognition-2311.04996</loc><lastmod>2023-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gpu-accelerated-wfst-beam-search-decoder-for-ctc-based-speech-recognition-2311.04996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gpu-accelerated-wfst-beam-search-decoder-for-ctc-based-speech-recognition-2311.04996"/></url>
<url><loc>https://scifaro.com/en/abs/improving-whispered-speech-recognition-performance-using-pseudo-whispered-based-data-augmentation-2311.05179</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-whispered-speech-recognition-performance-using-pseudo-whispered-based-data-augmentation-2311.05179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-whispered-speech-recognition-performance-using-pseudo-whispered-based-data-augmentation-2311.05179"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-reconstruction-using-neural-processes-with-dynamic-kernels-2311.05188</loc><lastmod>2023-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-reconstruction-using-neural-processes-with-dynamic-kernels-2311.05188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-reconstruction-using-neural-processes-with-dynamic-kernels-2311.05188"/></url>
<url><loc>https://scifaro.com/en/abs/phonological-level-wav2vec2-based-mispronunciation-detection-and-diagnosis-method-2311.07037</loc><lastmod>2023-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonological-level-wav2vec2-based-mispronunciation-detection-and-diagnosis-method-2311.07037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonological-level-wav2vec2-based-mispronunciation-detection-and-diagnosis-method-2311.07037"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-duet-singing-voices-separation-with-diffusion-models-2311.07345</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-duet-singing-voices-separation-with-diffusion-models-2311.07345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-duet-singing-voices-separation-with-diffusion-models-2311.07345"/></url>
<url><loc>https://scifaro.com/en/abs/qwen-audio-advancing-universal-audio-understanding-via-unified-large-scale-audio-language-models-2311.07919</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qwen-audio-advancing-universal-audio-understanding-via-unified-large-scale-audio-language-models-2311.07919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qwen-audio-advancing-universal-audio-understanding-via-unified-large-scale-audio-language-models-2311.07919"/></url>
<url><loc>https://scifaro.com/en/abs/generative-de-quantization-for-neural-speech-codec-via-latent-diffusion-2311.08330</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-de-quantization-for-neural-speech-codec-via-latent-diffusion-2311.08330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-de-quantization-for-neural-speech-codec-via-latent-diffusion-2311.08330"/></url>
<url><loc>https://scifaro.com/en/abs/mustango-toward-controllable-text-to-music-generation-2311.08355</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mustango-toward-controllable-text-to-music-generation-2311.08355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mustango-toward-controllable-text-to-music-generation-2311.08355"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-captioning-with-audio-language-model-guidance-and-audio-context-keywords-2311.08396</loc><lastmod>2023-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-with-audio-language-model-guidance-and-audio-context-keywords-2311.08396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-captioning-with-audio-language-model-guidance-and-audio-context-keywords-2311.08396"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-conversational-speaker-separation-via-neural-diarization-2311.08630</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-conversational-speaker-separation-via-neural-diarization-2311.08630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-conversational-speaker-separation-via-neural-diarization-2311.08630"/></url>
<url><loc>https://scifaro.com/en/abs/multi-objective-non-intrusive-hearing-aid-speech-assessment-model-2311.08878</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-objective-non-intrusive-hearing-aid-speech-assessment-model-2311.08878"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-objective-non-intrusive-hearing-aid-speech-assessment-model-2311.08878"/></url>
<url><loc>https://scifaro.com/en/abs/ai-based-soundscape-analysis-jointly-identifying-sound-sources-and-predicting-annoyance-2311.09030</loc><lastmod>2023-11-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-based-soundscape-analysis-jointly-identifying-sound-sources-and-predicting-annoyance-2311.09030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-based-soundscape-analysis-jointly-identifying-sound-sources-and-predicting-annoyance-2311.09030"/></url>
<url><loc>https://scifaro.com/en/abs/improving-fairness-for-spoken-language-understanding-in-atypical-speech-with-text-to-speech-2311.10149</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-fairness-for-spoken-language-understanding-in-atypical-speech-with-text-to-speech-2311.10149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-fairness-for-spoken-language-understanding-in-atypical-speech-with-text-to-speech-2311.10149"/></url>
<url><loc>https://scifaro.com/en/abs/le-ssl-mos-self-supervised-learning-mos-prediction-with-listener-enhancement-2311.10656</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/le-ssl-mos-self-supervised-learning-mos-prediction-with-listener-enhancement-2311.10656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/le-ssl-mos-self-supervised-learning-mos-prediction-with-listener-enhancement-2311.10656"/></url>
<url><loc>https://scifaro.com/en/abs/reprogramming-self-supervised-learning-based-speech-representations-for-speaker-anonymization-2311.10664</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reprogramming-self-supervised-learning-based-speech-representations-for-speaker-anonymization-2311.10664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reprogramming-self-supervised-learning-based-speech-representations-for-speaker-anonymization-2311.10664"/></url>
<url><loc>https://scifaro.com/en/abs/ghostvec-a-new-threat-to-speaker-privacy-of-end-to-end-speech-recognition-system-2311.10689</loc><lastmod>2023-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ghostvec-a-new-threat-to-speaker-privacy-of-end-to-end-speech-recognition-system-2311.10689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ghostvec-a-new-threat-to-speaker-privacy-of-end-to-end-speech-recognition-system-2311.10689"/></url>
<url><loc>https://scifaro.com/en/abs/mspb-a-longitudinal-multi-sensor-dataset-with-phenotypic-trait-measurements-from-honey-bees-2311.10876</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mspb-a-longitudinal-multi-sensor-dataset-with-phenotypic-trait-measurements-from-honey-bees-2311.10876"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mspb-a-longitudinal-multi-sensor-dataset-with-phenotypic-trait-measurements-from-honey-bees-2311.10876"/></url>
<url><loc>https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-adaptable-online-e2e-speech-recognition-2311.11353</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-adaptable-online-e2e-speech-recognition-2311.11353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-synchronous-neural-transducer-for-adaptable-online-e2e-speech-recognition-2311.11353"/></url>
<url><loc>https://scifaro.com/en/abs/apnet2-high-quality-and-high-efficiency-neural-vocoder-with-direct-prediction-of-amplitude-and-phase-spectra-2311.11545</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apnet2-high-quality-and-high-efficiency-neural-vocoder-with-direct-prediction-of-amplitude-and-phase-spectra-2311.11545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apnet2-high-quality-and-high-efficiency-neural-vocoder-with-direct-prediction-of-amplitude-and-phase-spectra-2311.11545"/></url>
<url><loc>https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimation-with-virtual-microphone-and-beamformer-level-multi-task-loss-2311.11595</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimation-with-virtual-microphone-and-beamformer-level-multi-task-loss-2311.11595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-network-based-virtual-microphone-estimation-with-virtual-microphone-and-beamformer-level-multi-task-loss-2311.11595"/></url>
<url><loc>https://scifaro.com/en/abs/how-does-end-to-end-speech-recognition-training-impact-speech-enhancement-artifacts-2311.11599</loc><lastmod>2023-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-does-end-to-end-speech-recognition-training-impact-speech-enhancement-artifacts-2311.11599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-does-end-to-end-speech-recognition-training-impact-speech-enhancement-artifacts-2311.11599"/></url>
<url><loc>https://scifaro.com/en/abs/eliminating-quantization-errors-in-classification-based-sound-source-localization-2311.12305</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eliminating-quantization-errors-in-classification-based-sound-source-localization-2311.12305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eliminating-quantization-errors-in-classification-based-sound-source-localization-2311.12305"/></url>
<url><loc>https://scifaro.com/en/abs/audiolog-llms-powered-long-audio-logging-with-hybrid-token-semantic-contrastive-learning-2311.12371</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiolog-llms-powered-long-audio-logging-with-hybrid-token-semantic-contrastive-learning-2311.12371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiolog-llms-powered-long-audio-logging-with-hybrid-token-semantic-contrastive-learning-2311.12371"/></url>
<url><loc>https://scifaro.com/en/abs/a-distributed-algorithm-for-personal-sound-zones-systems-2311.12427</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-distributed-algorithm-for-personal-sound-zones-systems-2311.12427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-distributed-algorithm-for-personal-sound-zones-systems-2311.12427"/></url>
<url><loc>https://scifaro.com/en/abs/hpcneuronet-advancing-neuromorphic-audio-signal-processing-with-transformer-enhanced-spiking-neural-networks-2311.12449</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hpcneuronet-advancing-neuromorphic-audio-signal-processing-with-transformer-enhanced-spiking-neural-networks-2311.12449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hpcneuronet-advancing-neuromorphic-audio-signal-processing-with-transformer-enhanced-spiking-neural-networks-2311.12449"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-pretrained-speech-model-for-mandarin-lyrics-transcription-and-alignment-2311.12488</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-pretrained-speech-model-for-mandarin-lyrics-transcription-and-alignment-2311.12488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-pretrained-speech-model-for-mandarin-lyrics-transcription-and-alignment-2311.12488"/></url>
<url><loc>https://scifaro.com/en/abs/summary-of-the-displace-challenge-2023-diarization-of-speaker-and-language-in-conversational-environments-2311.12564</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/summary-of-the-displace-challenge-2023-diarization-of-speaker-and-language-in-conversational-environments-2311.12564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/summary-of-the-displace-challenge-2023-diarization-of-speaker-and-language-in-conversational-environments-2311.12564"/></url>
<url><loc>https://scifaro.com/en/abs/learning-based-array-configuration-independent-binaural-audio-telepresence-with-scalable-signal-enhancement-and-ambience-preservation-2311.12706</loc><lastmod>2023-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-based-array-configuration-independent-binaural-audio-telepresence-with-scalable-signal-enhancement-and-ambience-preservation-2311.12706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-based-array-configuration-independent-binaural-audio-telepresence-with-scalable-signal-enhancement-and-ambience-preservation-2311.12706"/></url>
<url><loc>https://scifaro.com/en/abs/fedcpc-an-effective-federated-contrastive-learning-method-for-privacy-preserving-early-stage-alzheimer-s-speech-detection-2311.13043</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fedcpc-an-effective-federated-contrastive-learning-method-for-privacy-preserving-early-stage-alzheimer-s-speech-detection-2311.13043"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fedcpc-an-effective-federated-contrastive-learning-method-for-privacy-preserving-early-stage-alzheimer-s-speech-detection-2311.13043"/></url>
<url><loc>https://scifaro.com/en/abs/deep-audio-zooming-beamwidth-controllable-neural-beamformer-2311.13075</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-audio-zooming-beamwidth-controllable-neural-beamformer-2311.13075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-audio-zooming-beamwidth-controllable-neural-beamformer-2311.13075"/></url>
<url><loc>https://scifaro.com/en/abs/performance-analysis-of-binaural-signal-matching-bsm-in-the-time-frequency-domain-2311.13390</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-analysis-of-binaural-signal-matching-bsm-in-the-time-frequency-domain-2311.13390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-analysis-of-binaural-signal-matching-bsm-in-the-time-frequency-domain-2311.13390"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-audio-and-individualized-hrtfs-using-a-convolutional-neural-network-cnn-2311.13397</loc><lastmod>2023-11-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-audio-and-individualized-hrtfs-using-a-convolutional-neural-network-cnn-2311.13397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-audio-and-individualized-hrtfs-using-a-convolutional-neural-network-cnn-2311.13397"/></url>
<url><loc>https://scifaro.com/en/abs/sparsity-driven-eeg-channel-selection-for-brain-assisted-speech-enhancement-2311.13436</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparsity-driven-eeg-channel-selection-for-brain-assisted-speech-enhancement-2311.13436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparsity-driven-eeg-channel-selection-for-brain-assisted-speech-enhancement-2311.13436"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-transfer-learning-for-speaker-independent-cross-language-and-cross-corpus-speech-emotion-recognition-2311.13678</loc><lastmod>2025-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-transfer-learning-for-speaker-independent-cross-language-and-cross-corpus-speech-emotion-recognition-2311.13678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-transfer-learning-for-speaker-independent-cross-language-and-cross-corpus-speech-emotion-recognition-2311.13678"/></url>
<url><loc>https://scifaro.com/en/abs/jam-alt-a-formatting-aware-lyrics-transcription-benchmark-2311.13987</loc><lastmod>2023-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jam-alt-a-formatting-aware-lyrics-transcription-benchmark-2311.13987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jam-alt-a-formatting-aware-lyrics-transcription-benchmark-2311.13987"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-dual-conformer-with-scene-inspired-mask-for-soft-sound-event-detection-2311.14068</loc><lastmod>2023-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-dual-conformer-with-scene-inspired-mask-for-soft-sound-event-detection-2311.14068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-dual-conformer-with-scene-inspired-mask-for-soft-sound-event-detection-2311.14068"/></url>
<url><loc>https://scifaro.com/en/abs/ser-ampel-a-multi-source-dataset-for-speech-emotion-recognition-of-italian-older-adults-2311.14483</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ser-ampel-a-multi-source-dataset-for-speech-emotion-recognition-of-italian-older-adults-2311.14483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ser-ampel-a-multi-source-dataset-for-speech-emotion-recognition-of-italian-older-adults-2311.14483"/></url>
<url><loc>https://scifaro.com/en/abs/learning-arousal-valence-representation-from-categorical-emotion-labels-of-speech-2311.14816</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-arousal-valence-representation-from-categorical-emotion-labels-of-speech-2311.14816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-arousal-valence-representation-from-categorical-emotion-labels-of-speech-2311.14816"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-diarization-for-meeting-transcription-with-ad-hoc-acoustic-sensor-networks-2311.15597</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-diarization-for-meeting-transcription-with-ad-hoc-acoustic-sensor-networks-2311.15597"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-diarization-for-meeting-transcription-with-ad-hoc-acoustic-sensor-networks-2311.15597"/></url>
<url><loc>https://scifaro.com/en/abs/ultrasensitive-textile-strain-sensors-redefine-wearable-silent-speech-interfaces-with-high-machine-learning-efficiency-2311.15683</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultrasensitive-textile-strain-sensors-redefine-wearable-silent-speech-interfaces-with-high-machine-learning-efficiency-2311.15683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultrasensitive-textile-strain-sensors-redefine-wearable-silent-speech-interfaces-with-high-machine-learning-efficiency-2311.15683"/></url>
<url><loc>https://scifaro.com/en/abs/voice-anonymization-for-all-bias-evaluation-of-the-voice-privacy-challenge-baseline-system-2311.15804</loc><lastmod>2023-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-anonymization-for-all-bias-evaluation-of-the-voice-privacy-challenge-baseline-system-2311.15804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-anonymization-for-all-bias-evaluation-of-the-voice-privacy-challenge-baseline-system-2311.15804"/></url>
<url><loc>https://scifaro.com/en/abs/lc4sv-a-denoising-framework-learning-to-compensate-for-unseen-speaker-verification-models-2311.16604</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lc4sv-a-denoising-framework-learning-to-compensate-for-unseen-speaker-verification-models-2311.16604"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lc4sv-a-denoising-framework-learning-to-compensate-for-unseen-speaker-verification-models-2311.16604"/></url>
<url><loc>https://scifaro.com/en/abs/imagls-interaural-level-difference-with-magnitude-least-squares-loss-for-optimized-first-order-head-related-transfer-function-2311.16702</loc><lastmod>2024-04-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/imagls-interaural-level-difference-with-magnitude-least-squares-loss-for-optimized-first-order-head-related-transfer-function-2311.16702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/imagls-interaural-level-difference-with-magnitude-least-squares-loss-for-optimized-first-order-head-related-transfer-function-2311.16702"/></url>
<url><loc>https://scifaro.com/en/abs/study-of-speaker-localization-under-dynamic-and-reverberant-environments-2311.16927</loc><lastmod>2023-11-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-of-speaker-localization-under-dynamic-and-reverberant-environments-2311.16927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-of-speaker-localization-under-dynamic-and-reverberant-environments-2311.16927"/></url>
<url><loc>https://scifaro.com/en/abs/turbocharge-speech-understanding-with-pilot-inference-2311.17065</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/turbocharge-speech-understanding-with-pilot-inference-2311.17065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/turbocharge-speech-understanding-with-pilot-inference-2311.17065"/></url>
<url><loc>https://scifaro.com/en/abs/adapting-openai-s-whisper-for-speech-recognition-on-code-switch-mandarin-english-seame-and-asru2019-datasets-2311.17382</loc><lastmod>2023-11-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adapting-openai-s-whisper-for-speech-recognition-on-code-switch-mandarin-english-seame-and-asru2019-datasets-2311.17382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adapting-openai-s-whisper-for-speech-recognition-on-code-switch-mandarin-english-seame-and-asru2019-datasets-2311.17382"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-generative-adversarial-networks-for-unseen-word-generation-from-eeg-signals-2311.17923</loc><lastmod>2023-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-generative-adversarial-networks-for-unseen-word-generation-from-eeg-signals-2311.17923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-generative-adversarial-networks-for-unseen-word-generation-from-eeg-signals-2311.17923"/></url>
<url><loc>https://scifaro.com/en/abs/speech-understanding-on-tiny-devices-with-a-learning-cache-2311.18188</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-understanding-on-tiny-devices-with-a-learning-cache-2311.18188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-understanding-on-tiny-devices-with-a-learning-cache-2311.18188"/></url>
<url><loc>https://scifaro.com/en/abs/audio-prompt-tuning-for-universal-sound-separation-2311.18399</loc><lastmod>2023-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-prompt-tuning-for-universal-sound-separation-2311.18399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-prompt-tuning-for-universal-sound-separation-2311.18399"/></url>
<url><loc>https://scifaro.com/en/abs/subspace-hybrid-mvdr-beamforming-for-augmented-hearing-2311.18689</loc><lastmod>2023-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/subspace-hybrid-mvdr-beamforming-for-augmented-hearing-2311.18689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/subspace-hybrid-mvdr-beamforming-for-augmented-hearing-2311.18689"/></url>
<url><loc>https://scifaro.com/en/abs/an-aliasing-free-hybrid-digital-analog-polyphonic-synthesizer-2311.18774</loc><lastmod>2023-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-aliasing-free-hybrid-digital-analog-polyphonic-synthesizer-2311.18774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-aliasing-free-hybrid-digital-analog-polyphonic-synthesizer-2311.18774"/></url>
<url><loc>https://scifaro.com/en/abs/compression-of-end-to-end-non-autoregressive-image-to-speech-system-for-low-resourced-devices-2312.00174</loc><lastmod>2023-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/compression-of-end-to-end-non-autoregressive-image-to-speech-system-for-low-resourced-devices-2312.00174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/compression-of-end-to-end-non-autoregressive-image-to-speech-system-for-low-resourced-devices-2312.00174"/></url>
<url><loc>https://scifaro.com/en/abs/learning-domain-invariant-classifiers-for-infant-cry-sounds-2312.00231</loc><lastmod>2023-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-domain-invariant-classifiers-for-infant-cry-sounds-2312.00231"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-domain-invariant-classifiers-for-infant-cry-sounds-2312.00231"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-prompt-tuning-empowering-large-language-models-with-audition-capabilities-2312.00249</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-prompt-tuning-empowering-large-language-models-with-audition-capabilities-2312.00249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-prompt-tuning-empowering-large-language-models-with-audition-capabilities-2312.00249"/></url>
<url><loc>https://scifaro.com/en/abs/spire-sies-a-spontaneous-indian-english-speech-corpus-2312.00698</loc><lastmod>2023-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spire-sies-a-spontaneous-indian-english-speech-corpus-2312.00698"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spire-sies-a-spontaneous-indian-english-speech-corpus-2312.00698"/></url>
<url><loc>https://scifaro.com/en/abs/sefgan-harvesting-the-power-of-normalizing-flows-and-gans-for-efficient-high-quality-speech-enhancement-2312.01744</loc><lastmod>2023-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sefgan-harvesting-the-power-of-normalizing-flows-and-gans-for-efficient-high-quality-speech-enhancement-2312.01744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sefgan-harvesting-the-power-of-normalizing-flows-and-gans-for-efficient-high-quality-speech-enhancement-2312.01744"/></url>
<url><loc>https://scifaro.com/en/abs/head-orientation-estimation-with-distributed-microphones-using-speech-radiation-patterns-2312.01808</loc><lastmod>2026-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/head-orientation-estimation-with-distributed-microphones-using-speech-radiation-patterns-2312.01808"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/head-orientation-estimation-with-distributed-microphones-using-speech-radiation-patterns-2312.01808"/></url>
<url><loc>https://scifaro.com/en/abs/auralization-based-on-multi-perspective-ambisonic-room-impulse-responses-2312.02581</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auralization-based-on-multi-perspective-ambisonic-room-impulse-responses-2312.02581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auralization-based-on-multi-perspective-ambisonic-room-impulse-responses-2312.02581"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-speech-enhancement-in-matched-and-mismatched-conditions-using-a-heun-based-sampler-2312.02683</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-in-matched-and-mismatched-conditions-using-a-heun-based-sampler-2312.02683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-in-matched-and-mismatched-conditions-using-a-heun-based-sampler-2312.02683"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-speech-dereverberation-using-weighted-prediction-error-2312.03034</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-speech-dereverberation-using-weighted-prediction-error-2312.03034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-speech-dereverberation-using-weighted-prediction-error-2312.03034"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-laryngograph-data-for-robust-voicing-detection-in-speech-2312.03129</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-laryngograph-data-for-robust-voicing-detection-in-speech-2312.03129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-laryngograph-data-for-robust-voicing-detection-in-speech-2312.03129"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-speaker-verification-using-transformation-module-with-feature-partition-and-fusion-2312.03324</loc><lastmod>2023-12-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-speaker-verification-using-transformation-module-with-feature-partition-and-fusion-2312.03324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-speaker-verification-using-transformation-module-with-feature-partition-and-fusion-2312.03324"/></url>
<url><loc>https://scifaro.com/en/abs/golden-gemini-is-all-you-need-finding-the-sweet-spots-for-speaker-verification-2312.03620</loc><lastmod>2024-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/golden-gemini-is-all-you-need-finding-the-sweet-spots-for-speaker-verification-2312.03620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/golden-gemini-is-all-you-need-finding-the-sweet-spots-for-speaker-verification-2312.03620"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-pre-trained-speech-and-language-models-for-end-to-end-speech-recognition-2312.03668</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-pre-trained-speech-and-language-models-for-end-to-end-speech-recognition-2312.03668"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-pre-trained-speech-and-language-models-for-end-to-end-speech-recognition-2312.03668"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-audio-spectrogram-transformers-2312.03694</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-audio-spectrogram-transformers-2312.03694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-of-audio-spectrogram-transformers-2312.03694"/></url>
<url><loc>https://scifaro.com/en/abs/joint-training-or-not-an-exploration-of-pre-trained-speech-models-in-audio-visual-speaker-diarization-2312.04131</loc><lastmod>2023-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-training-or-not-an-exploration-of-pre-trained-speech-models-in-audio-visual-speaker-diarization-2312.04131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-training-or-not-an-exploration-of-pre-trained-speech-models-in-audio-visual-speaker-diarization-2312.04131"/></url>
<url><loc>https://scifaro.com/en/abs/diaper-end-to-end-neural-diarization-with-perceiver-based-attractors-2312.04324</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diaper-end-to-end-neural-diarization-with-perceiver-based-attractors-2312.04324"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diaper-end-to-end-neural-diarization-with-perceiver-based-attractors-2312.04324"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-design-space-of-diffusion-models-for-speech-enhancement-2312.04370</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-design-space-of-diffusion-models-for-speech-enhancement-2312.04370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-design-space-of-diffusion-models-for-speech-enhancement-2312.04370"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-multichannel-blind-speaker-separation-with-a-causal-low-latency-and-low-complexity-approach-2312.05173</loc><lastmod>2023-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-multichannel-blind-speaker-separation-with-a-causal-low-latency-and-low-complexity-approach-2312.05173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-multichannel-blind-speaker-separation-with-a-causal-low-latency-and-low-complexity-approach-2312.05173"/></url>
<url><loc>https://scifaro.com/en/abs/eend-demux-end-to-end-neural-speaker-diarization-via-demultiplexed-speaker-embeddings-2312.06065</loc><lastmod>2023-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eend-demux-end-to-end-neural-speaker-diarization-via-demultiplexed-speaker-embeddings-2312.06065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eend-demux-end-to-end-neural-speaker-diarization-via-demultiplexed-speaker-embeddings-2312.06065"/></url>
<url><loc>https://scifaro.com/en/abs/testing-correctness-fairness-and-robustness-of-speech-emotion-recognition-models-2312.06270</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/testing-correctness-fairness-and-robustness-of-speech-emotion-recognition-models-2312.06270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/testing-correctness-fairness-and-robustness-of-speech-emotion-recognition-models-2312.06270"/></url>
<url><loc>https://scifaro.com/en/abs/w2v-seld-a-sound-event-localization-and-detection-framework-for-self-supervised-spatial-audio-pre-training-2312.06907</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/w2v-seld-a-sound-event-localization-and-detection-framework-for-self-supervised-spatial-audio-pre-training-2312.06907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/w2v-seld-a-sound-event-localization-and-detection-framework-for-self-supervised-spatial-audio-pre-training-2312.06907"/></url>
<url><loc>https://scifaro.com/en/abs/neuroheed-improving-neuro-steered-speaker-extraction-with-joint-auditory-attention-detection-2312.07513</loc><lastmod>2023-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuroheed-improving-neuro-steered-speaker-extraction-with-joint-auditory-attention-detection-2312.07513"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuroheed-improving-neuro-steered-speaker-extraction-with-joint-auditory-attention-detection-2312.07513"/></url>
<url><loc>https://scifaro.com/en/abs/audio-deepfake-detection-with-self-supervised-wavlm-and-multi-fusion-attentive-classifier-2312.08089</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-deepfake-detection-with-self-supervised-wavlm-and-multi-fusion-attentive-classifier-2312.08089"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-deepfake-detection-with-self-supervised-wavlm-and-multi-fusion-attentive-classifier-2312.08089"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-low-complexity-deep-learning-based-noise-suppression-2312.08132</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-low-complexity-deep-learning-based-noise-suppression-2312.08132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-low-complexity-deep-learning-based-noise-suppression-2312.08132"/></url>
<url><loc>https://scifaro.com/en/abs/metrological-support-of-acoustic-measuring-installations-mid-frequency-devices-2312.08496</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metrological-support-of-acoustic-measuring-installations-mid-frequency-devices-2312.08496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metrological-support-of-acoustic-measuring-installations-mid-frequency-devices-2312.08496"/></url>
<url><loc>https://scifaro.com/en/abs/usm-lite-quantization-and-sparsity-aware-fine-tuning-for-speech-recognition-with-universal-speech-models-2312.08553</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usm-lite-quantization-and-sparsity-aware-fine-tuning-for-speech-recognition-with-universal-speech-models-2312.08553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usm-lite-quantization-and-sparsity-aware-fine-tuning-for-speech-recognition-with-universal-speech-models-2312.08553"/></url>
<url><loc>https://scifaro.com/en/abs/next-tdnn-modernizing-multi-scale-temporal-convolution-backbone-for-speaker-verification-2312.08603</loc><lastmod>2026-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/next-tdnn-modernizing-multi-scale-temporal-convolution-backbone-for-speaker-verification-2312.08603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/next-tdnn-modernizing-multi-scale-temporal-convolution-backbone-for-speaker-verification-2312.08603"/></url>
<url><loc>https://scifaro.com/en/abs/a-computationally-efficient-semi-blind-source-separation-based-approach-for-nonlinear-echo-cancellation-based-on-an-element-wise-iterative-source-steering-2312.08610</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-computationally-efficient-semi-blind-source-separation-based-approach-for-nonlinear-echo-cancellation-based-on-an-element-wise-iterative-source-steering-2312.08610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-computationally-efficient-semi-blind-source-separation-based-approach-for-nonlinear-echo-cancellation-based-on-an-element-wise-iterative-source-steering-2312.08610"/></url>
<url><loc>https://scifaro.com/en/abs/scalable-ensemble-based-detection-method-against-adversarial-attacks-for-speaker-verification-2312.08622</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scalable-ensemble-based-detection-method-against-adversarial-attacks-for-speaker-verification-2312.08622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scalable-ensemble-based-detection-method-against-adversarial-attacks-for-speaker-verification-2312.08622"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automatic-data-augmentation-for-disordered-speech-recognition-2312.08641</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automatic-data-augmentation-for-disordered-speech-recognition-2312.08641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automatic-data-augmentation-for-disordered-speech-recognition-2312.08641"/></url>
<url><loc>https://scifaro.com/en/abs/reconstruction-of-sound-field-through-diffusion-models-2312.08821</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reconstruction-of-sound-field-through-diffusion-models-2312.08821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reconstruction-of-sound-field-through-diffusion-models-2312.08821"/></url>
<url><loc>https://scifaro.com/en/abs/attention-guided-adaptation-for-code-switching-speech-recognition-2312.08856</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-guided-adaptation-for-code-switching-speech-recognition-2312.08856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-guided-adaptation-for-code-switching-speech-recognition-2312.08856"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-noise-data-augmentation-for-dnn-based-own-voice-reconstruction-for-hearables-in-noisy-environments-2312.08908</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-noise-data-augmentation-for-dnn-based-own-voice-reconstruction-for-hearables-in-noisy-environments-2312.08908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-noise-data-augmentation-for-dnn-based-own-voice-reconstruction-for-hearables-in-noisy-environments-2312.08908"/></url>
<url><loc>https://scifaro.com/en/abs/design-construction-and-evaluation-of-emotional-multimodal-pathological-speech-database-2312.08998</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-construction-and-evaluation-of-emotional-multimodal-pathological-speech-database-2312.08998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-construction-and-evaluation-of-emotional-multimodal-pathological-speech-database-2312.08998"/></url>
<url><loc>https://scifaro.com/en/abs/fusion-of-audio-and-visual-embeddings-for-sound-event-localization-and-detection-2312.09034</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusion-of-audio-and-visual-embeddings-for-sound-event-localization-and-detection-2312.09034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusion-of-audio-and-visual-embeddings-for-sound-event-localization-and-detection-2312.09034"/></url>
<url><loc>https://scifaro.com/en/abs/fastinject-injecting-unpaired-text-data-into-ctc-based-asr-training-2312.09100</loc><lastmod>2023-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastinject-injecting-unpaired-text-data-into-ctc-based-asr-training-2312.09100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastinject-injecting-unpaired-text-data-into-ctc-based-asr-training-2312.09100"/></url>
<url><loc>https://scifaro.com/en/abs/ir-uwb-radar-based-contactless-silent-speech-recognition-of-vowels-consonants-words-and-phrases-2312.09572</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ir-uwb-radar-based-contactless-silent-speech-recognition-of-vowels-consonants-words-and-phrases-2312.09572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ir-uwb-radar-based-contactless-silent-speech-recognition-of-vowels-consonants-words-and-phrases-2312.09572"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-learning-for-anomalous-sound-detection-2312.09578</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-learning-for-anomalous-sound-detection-2312.09578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-learning-for-anomalous-sound-detection-2312.09578"/></url>
<url><loc>https://scifaro.com/en/abs/a-deep-representation-learning-based-speech-enhancement-method-using-complex-convolution-recurrent-variational-autoencoder-2312.09620</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-deep-representation-learning-based-speech-enhancement-method-using-complex-convolution-recurrent-variational-autoencoder-2312.09620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-deep-representation-learning-based-speech-enhancement-method-using-complex-convolution-recurrent-variational-autoencoder-2312.09620"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuned-self-supervised-speech-representations-for-language-diarization-in-multilingual-code-switched-speech-2312.09645</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuned-self-supervised-speech-representations-for-language-diarization-in-multilingual-code-switched-speech-2312.09645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuned-self-supervised-speech-representations-for-language-diarization-in-multilingual-code-switched-speech-2312.09645"/></url>
<url><loc>https://scifaro.com/en/abs/toward-deep-drum-source-separation-2312.09663</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-deep-drum-source-separation-2312.09663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-deep-drum-source-separation-2312.09663"/></url>
<url><loc>https://scifaro.com/en/abs/selm-speech-enhancement-using-discrete-tokens-and-language-models-2312.09747</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selm-speech-enhancement-using-discrete-tokens-and-language-models-2312.09747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selm-speech-enhancement-using-discrete-tokens-and-language-models-2312.09747"/></url>
<url><loc>https://scifaro.com/en/abs/u2-kws-unified-two-pass-open-vocabulary-keyword-spotting-with-keyword-bias-2312.09760</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/u2-kws-unified-two-pass-open-vocabulary-keyword-spotting-with-keyword-bias-2312.09760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/u2-kws-unified-two-pass-open-vocabulary-keyword-spotting-with-keyword-bias-2312.09760"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-envelope-and-frequency-following-eeg-responses-to-continuous-speech-using-deep-neural-networks-2312.09768</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-envelope-and-frequency-following-eeg-responses-to-continuous-speech-using-deep-neural-networks-2312.09768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-envelope-and-frequency-following-eeg-responses-to-continuous-speech-using-deep-neural-networks-2312.09768"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-graph-learning-for-audio-event-classification-and-human-perceived-annoyance-rating-prediction-2312.09952</loc><lastmod>2023-12-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-graph-learning-for-audio-event-classification-and-human-perceived-annoyance-rating-prediction-2312.09952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-graph-learning-for-audio-event-classification-and-human-perceived-annoyance-rating-prediction-2312.09952"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-the-entropy-semiring-for-neural-speech-recognition-2312.10087</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-the-entropy-semiring-for-neural-speech-recognition-2312.10087"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-the-entropy-semiring-for-neural-speech-recognition-2312.10087"/></url>
<url><loc>https://scifaro.com/en/abs/on-robustness-to-missing-video-for-audiovisual-speech-recognition-2312.10088</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-robustness-to-missing-video-for-audiovisual-speech-recognition-2312.10088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-robustness-to-missing-video-for-audiovisual-speech-recognition-2312.10088"/></url>
<url><loc>https://scifaro.com/en/abs/mm-tts-multi-modal-prompt-based-style-transfer-for-expressive-text-to-speech-synthesis-2312.10687</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mm-tts-multi-modal-prompt-based-style-transfer-for-expressive-text-to-speech-synthesis-2312.10687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mm-tts-multi-modal-prompt-based-style-transfer-for-expressive-text-to-speech-synthesis-2312.10687"/></url>
<url><loc>https://scifaro.com/en/abs/stylesinger-style-transfer-for-out-of-domain-singing-voice-synthesis-2312.10741</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylesinger-style-transfer-for-out-of-domain-singing-voice-synthesis-2312.10741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylesinger-style-transfer-for-out-of-domain-singing-voice-synthesis-2312.10741"/></url>
<url><loc>https://scifaro.com/en/abs/attention-driven-multichannel-speech-enhancement-in-moving-sound-source-scenarios-2312.10756</loc><lastmod>2023-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-driven-multichannel-speech-enhancement-in-moving-sound-source-scenarios-2312.10756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-driven-multichannel-speech-enhancement-in-moving-sound-source-scenarios-2312.10756"/></url>
<url><loc>https://scifaro.com/en/abs/a-refining-underlying-information-framework-for-monaural-speech-enhancement-2312.11201</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-refining-underlying-information-framework-for-monaural-speech-enhancement-2312.11201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-refining-underlying-information-framework-for-monaural-speech-enhancement-2312.11201"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-speech-in-speech-perception-via-a-humanoid-robot-2312.12262</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-speech-in-speech-perception-via-a-humanoid-robot-2312.12262"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-speech-in-speech-perception-via-a-humanoid-robot-2312.12262"/></url>
<url><loc>https://scifaro.com/en/abs/on-real-time-multi-stage-speech-enhancement-systems-2312.12415</loc><lastmod>2023-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-real-time-multi-stage-speech-enhancement-systems-2312.12415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-real-time-multi-stage-speech-enhancement-systems-2312.12415"/></url>
<url><loc>https://scifaro.com/en/abs/lattice-rescoring-based-on-large-ensemble-of-complementary-neural-language-models-2312.12764</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lattice-rescoring-based-on-large-ensemble-of-complementary-neural-language-models-2312.12764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lattice-rescoring-based-on-large-ensemble-of-complementary-neural-language-models-2312.12764"/></url>
<url><loc>https://scifaro.com/en/abs/stable-distillation-regularizing-continued-pre-training-for-low-resource-automatic-speech-recognition-2312.12783</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stable-distillation-regularizing-continued-pre-training-for-low-resource-automatic-speech-recognition-2312.12783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stable-distillation-regularizing-continued-pre-training-for-low-resource-automatic-speech-recognition-2312.12783"/></url>
<url><loc>https://scifaro.com/en/abs/unconstrained-dysfluency-modeling-for-dysfluent-speech-transcription-and-detection-2312.12810</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unconstrained-dysfluency-modeling-for-dysfluent-speech-transcription-and-detection-2312.12810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unconstrained-dysfluency-modeling-for-dysfluent-speech-transcription-and-detection-2312.12810"/></url>
<url><loc>https://scifaro.com/en/abs/cst-former-transformer-with-channel-spectro-temporal-attention-for-sound-event-localization-and-detection-2312.12821</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cst-former-transformer-with-channel-spectro-temporal-attention-for-sound-event-localization-and-detection-2312.12821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cst-former-transformer-with-channel-spectro-temporal-attention-for-sound-event-localization-and-detection-2312.12821"/></url>
<url><loc>https://scifaro.com/en/abs/fusdom-combining-in-domain-and-out-of-domain-knowledge-for-continuous-self-supervised-learning-2312.13026</loc><lastmod>2023-12-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusdom-combining-in-domain-and-out-of-domain-knowledge-for-continuous-self-supervised-learning-2312.13026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusdom-combining-in-domain-and-out-of-domain-knowledge-for-continuous-self-supervised-learning-2312.13026"/></url>
<url><loc>https://scifaro.com/en/abs/braintalker-low-resource-brain-to-speech-synthesis-with-transfer-learning-using-wav2vec-2-0-2312.13600</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/braintalker-low-resource-brain-to-speech-synthesis-with-transfer-learning-using-wav2vec-2-0-2312.13600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/braintalker-low-resource-brain-to-speech-synthesis-with-transfer-learning-using-wav2vec-2-0-2312.13600"/></url>
<url><loc>https://scifaro.com/en/abs/style-modeling-for-multi-speaker-articulation-to-speech-2312.13603</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/style-modeling-for-multi-speaker-articulation-to-speech-2312.13603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/style-modeling-for-multi-speaker-articulation-to-speech-2312.13603"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-complex-network-for-machine-sound-anomaly-detection-2312.13615</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-complex-network-for-machine-sound-anomaly-detection-2312.13615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-complex-network-for-machine-sound-anomaly-detection-2312.13615"/></url>
<url><loc>https://scifaro.com/en/abs/blind-localization-of-room-reflections-with-application-to-spatial-audio-2312.13707</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-localization-of-room-reflections-with-application-to-spatial-audio-2312.13707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-localization-of-room-reflections-with-application-to-spatial-audio-2312.13707"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-visual-supervision-for-array-based-active-speaker-detection-and-localization-2312.14021</loc><lastmod>2023-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-visual-supervision-for-array-based-active-speaker-detection-and-localization-2312.14021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-visual-supervision-for-array-based-active-speaker-detection-and-localization-2312.14021"/></url>
<url><loc>https://scifaro.com/en/abs/the-umbomic-a-pvdf-cantilever-microphone-2312.14339</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-umbomic-a-pvdf-cantilever-microphone-2312.14339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-umbomic-a-pvdf-cantilever-microphone-2312.14339"/></url>
<url><loc>https://scifaro.com/en/abs/noise-morphing-for-audio-time-stretching-2312.14586</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-morphing-for-audio-time-stretching-2312.14586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-morphing-for-audio-time-stretching-2312.14586"/></url>
<url><loc>https://scifaro.com/en/abs/blstm-based-confidence-estimation-for-end-to-end-speech-recognition-2312.14609</loc><lastmod>2023-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blstm-based-confidence-estimation-for-end-to-end-speech-recognition-2312.14609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blstm-based-confidence-estimation-for-end-to-end-speech-recognition-2312.14609"/></url>
<url><loc>https://scifaro.com/en/abs/an-implantable-piezofilm-middle-ear-microphone-performance-in-human-cadaveric-temporal-bones-2312.14844</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-implantable-piezofilm-middle-ear-microphone-performance-in-human-cadaveric-temporal-bones-2312.14844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-implantable-piezofilm-middle-ear-microphone-performance-in-human-cadaveric-temporal-bones-2312.14844"/></url>
<url><loc>https://scifaro.com/en/abs/consistent-and-relevant-rethink-the-query-embedding-in-general-sound-separation-2312.15463</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistent-and-relevant-rethink-the-query-embedding-in-general-sound-separation-2312.15463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistent-and-relevant-rethink-the-query-embedding-in-general-sound-separation-2312.15463"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-data-augmentation-in-bias-mitigation-against-non-native-accented-speech-2312.15499</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-data-augmentation-in-bias-mitigation-against-non-native-accented-speech-2312.15499"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-data-augmentation-in-bias-mitigation-against-non-native-accented-speech-2312.15499"/></url>
<url><loc>https://scifaro.com/en/abs/the-nus-hlt-system-for-icassp2024-icmc-asr-grand-challenge-2312.16002</loc><lastmod>2023-12-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-nus-hlt-system-for-icassp2024-icmc-asr-grand-challenge-2312.16002"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-nus-hlt-system-for-icassp2024-icmc-asr-grand-challenge-2312.16002"/></url>
<url><loc>https://scifaro.com/en/abs/selective-memory-meta-learning-with-environment-representations-for-sound-event-localization-and-detection-2312.16422</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selective-memory-meta-learning-with-environment-representations-for-sound-event-localization-and-detection-2312.16422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selective-memory-meta-learning-with-environment-representations-for-sound-event-localization-and-detection-2312.16422"/></url>
<url><loc>https://scifaro.com/en/abs/online-similarity-and-independence-aware-beamformer-for-low-latency-target-sound-extraction-2312.16449</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-similarity-and-independence-aware-beamformer-for-low-latency-target-sound-extraction-2312.16449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-similarity-and-independence-aware-beamformer-for-low-latency-target-sound-extraction-2312.16449"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-quantification-in-machine-learning-for-joint-speaker-diarization-and-identification-2312.16763</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-quantification-in-machine-learning-for-joint-speaker-diarization-and-identification-2312.16763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-quantification-in-machine-learning-for-joint-speaker-diarization-and-identification-2312.16763"/></url>
<url><loc>https://scifaro.com/en/abs/a-new-perspective-on-speaker-verification-joint-modeling-with-dfsmn-and-transformer-2312.16826</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-new-perspective-on-speaker-verification-joint-modeling-with-dfsmn-and-transformer-2312.16826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-new-perspective-on-speaker-verification-joint-modeling-with-dfsmn-and-transformer-2312.16826"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-recording-methods-with-analysis-on-inter-aural-time-level-and-phase-differences-2312.16884</loc><lastmod>2023-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-recording-methods-with-analysis-on-inter-aural-time-level-and-phase-differences-2312.16884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-recording-methods-with-analysis-on-inter-aural-time-level-and-phase-differences-2312.16884"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-speech-enhancement-using-learnable-loss-mixup-2312.17255</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-learnable-loss-mixup-2312.17255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-speech-enhancement-using-learnable-loss-mixup-2312.17255"/></url>
<url><loc>https://scifaro.com/en/abs/attention-based-interactive-disentangling-network-for-instance-level-emotional-voice-conversion-2312.17508</loc><lastmod>2024-01-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-based-interactive-disentangling-network-for-instance-level-emotional-voice-conversion-2312.17508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-based-interactive-disentangling-network-for-instance-level-emotional-voice-conversion-2312.17508"/></url>
<url><loc>https://scifaro.com/en/abs/odaq-open-dataset-of-audio-quality-2401.00197</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/odaq-open-dataset-of-audio-quality-2401.00197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/odaq-open-dataset-of-audio-quality-2401.00197"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-dysarthria-speech-feature-representation-with-empirical-mode-decomposition-and-walsh-hadamard-transform-2401.00225</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-dysarthria-speech-feature-representation-with-empirical-mode-decomposition-and-walsh-hadamard-transform-2401.00225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-dysarthria-speech-feature-representation-with-empirical-mode-decomposition-and-walsh-hadamard-transform-2401.00225"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-zero-shot-generalizability-on-mandarin-english-code-switched-asr-and-speech-to-text-translation-of-recent-foundation-models-with-self-supervision-and-weak-supervision-2401.00273</loc><lastmod>2024-01-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-zero-shot-generalizability-on-mandarin-english-code-switched-asr-and-speech-to-text-translation-of-recent-foundation-models-with-self-supervision-and-weak-supervision-2401.00273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-zero-shot-generalizability-on-mandarin-english-code-switched-asr-and-speech-to-text-translation-of-recent-foundation-models-with-self-supervision-and-weak-supervision-2401.00273"/></url>
<url><loc>https://scifaro.com/en/abs/ultraspherical-gegenbauer-polynomials-to-unify-2d-3d-ambisonic-directivity-designs-2401.00813</loc><lastmod>2024-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultraspherical-gegenbauer-polynomials-to-unify-2d-3d-ambisonic-directivity-designs-2401.00813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultraspherical-gegenbauer-polynomials-to-unify-2d-3d-ambisonic-directivity-designs-2401.00813"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-the-presence-of-sperm-whales-echolocation-clicks-in-noisy-environments-2401.00900</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-the-presence-of-sperm-whales-echolocation-clicks-in-noisy-environments-2401.00900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-the-presence-of-sperm-whales-echolocation-clicks-in-noisy-environments-2401.00900"/></url>
<url><loc>https://scifaro.com/en/abs/the-role-of-direct-sound-spherical-harmonics-representation-in-externalization-using-binaural-reproduction-2401.00936</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-role-of-direct-sound-spherical-harmonics-representation-in-externalization-using-binaural-reproduction-2401.00936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-role-of-direct-sound-spherical-harmonics-representation-in-externalization-using-binaural-reproduction-2401.00936"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-parallel-audio-generation-using-group-masked-language-modeling-2401.01099</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-parallel-audio-generation-using-group-masked-language-modeling-2401.01099"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-parallel-audio-generation-using-group-masked-language-modeling-2401.01099"/></url>
<url><loc>https://scifaro.com/en/abs/haaqi-net-a-non-intrusive-neural-music-audio-quality-assessment-model-for-hearing-aids-2401.01145</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/haaqi-net-a-non-intrusive-neural-music-audio-quality-assessment-model-for-hearing-aids-2401.01145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/haaqi-net-a-non-intrusive-neural-music-audio-quality-assessment-model-for-hearing-aids-2401.01145"/></url>
<url><loc>https://scifaro.com/en/abs/room-impulse-response-reconstruction-with-physics-informed-deep-learning-2401.01206</loc><lastmod>2024-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-impulse-response-reconstruction-with-physics-informed-deep-learning-2401.01206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-impulse-response-reconstruction-with-physics-informed-deep-learning-2401.01206"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-parameter-estimation-of-sinusoidal-models-for-speech-and-audio-signals-2401.01255</loc><lastmod>2026-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-parameter-estimation-of-sinusoidal-models-for-speech-and-audio-signals-2401.01255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-parameter-estimation-of-sinusoidal-models-for-speech-and-audio-signals-2401.01255"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-reflective-learning-through-self-distillation-and-online-clustering-for-speaker-representation-learning-2401.01473</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-reflective-learning-through-self-distillation-and-online-clustering-for-speaker-representation-learning-2401.01473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-reflective-learning-through-self-distillation-and-online-clustering-for-speaker-representation-learning-2401.01473"/></url>
<url><loc>https://scifaro.com/en/abs/utilizing-neural-transducers-for-two-stage-text-to-speech-via-semantic-token-prediction-2401.01498</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/utilizing-neural-transducers-for-two-stage-text-to-speech-via-semantic-token-prediction-2401.01498"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/utilizing-neural-transducers-for-two-stage-text-to-speech-via-semantic-token-prediction-2401.01498"/></url>
<url><loc>https://scifaro.com/en/abs/comosvc-consistency-model-based-singing-voice-conversion-2401.01792</loc><lastmod>2024-01-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comosvc-consistency-model-based-singing-voice-conversion-2401.01792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comosvc-consistency-model-based-singing-voice-conversion-2401.01792"/></url>
<url><loc>https://scifaro.com/en/abs/ctc-blank-triggered-dynamic-layer-skipping-for-efficient-ctc-based-speech-recognition-2401.02046</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctc-blank-triggered-dynamic-layer-skipping-for-efficient-ctc-based-speech-recognition-2401.02046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctc-blank-triggered-dynamic-layer-skipping-for-efficient-ctc-based-speech-recognition-2401.02046"/></url>
<url><loc>https://scifaro.com/en/abs/listening-broadband-physical-model-for-microphones-a-first-step-2401.02164</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-broadband-physical-model-for-microphones-a-first-step-2401.02164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-broadband-physical-model-for-microphones-a-first-step-2401.02164"/></url>
<url><loc>https://scifaro.com/en/abs/optimal-real-weighted-beamforming-with-application-to-linear-and-spherical-arrays-2401.02285</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimal-real-weighted-beamforming-with-application-to-linear-and-spherical-arrays-2401.02285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimal-real-weighted-beamforming-with-application-to-linear-and-spherical-arrays-2401.02285"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-estimation-using-microphone-array-processing-for-moving-humanoid-robots-2401.02386</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-using-microphone-array-processing-for-moving-humanoid-robots-2401.02386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-estimation-using-microphone-array-processing-for-moving-humanoid-robots-2401.02386"/></url>
<url><loc>https://scifaro.com/en/abs/task-oriented-dialogue-as-a-catalyst-for-self-supervised-automatic-speech-recognition-2401.02417</loc><lastmod>2024-01-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-oriented-dialogue-as-a-catalyst-for-self-supervised-automatic-speech-recognition-2401.02417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-oriented-dialogue-as-a-catalyst-for-self-supervised-automatic-speech-recognition-2401.02417"/></url>
<url><loc>https://scifaro.com/en/abs/some-clues-to-build-a-sound-analysis-relevant-to-hearing-2401.02463</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/some-clues-to-build-a-sound-analysis-relevant-to-hearing-2401.02463"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/some-clues-to-build-a-sound-analysis-relevant-to-hearing-2401.02463"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-multichannel-far-field-speech-recognition-system-combining-neural-beamforming-with-attention-based-end-to-end-model-2401.02673</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-multichannel-far-field-speech-recognition-system-combining-neural-beamforming-with-attention-based-end-to-end-model-2401.02673"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-multichannel-far-field-speech-recognition-system-combining-neural-beamforming-with-attention-based-end-to-end-model-2401.02673"/></url>
<url><loc>https://scifaro.com/en/abs/pheme-efficient-and-conversational-speech-generation-2401.02839</loc><lastmod>2024-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pheme-efficient-and-conversational-speech-generation-2401.02839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pheme-efficient-and-conversational-speech-generation-2401.02839"/></url>
<url><loc>https://scifaro.com/en/abs/streamvc-real-time-low-latency-voice-conversion-2401.03078</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamvc-real-time-low-latency-voice-conversion-2401.03078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamvc-real-time-low-latency-voice-conversion-2401.03078"/></url>
<url><loc>https://scifaro.com/en/abs/teles-temporal-lexeme-similarity-score-to-estimate-confidence-in-end-to-end-asr-2401.03251</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/teles-temporal-lexeme-similarity-score-to-estimate-confidence-in-end-to-end-asr-2401.03251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/teles-temporal-lexeme-similarity-score-to-estimate-confidence-in-end-to-end-asr-2401.03251"/></url>
<url><loc>https://scifaro.com/en/abs/theoretical-framework-for-the-optimization-of-microphone-array-configuration-for-humanoid-robot-audition-2401.03286</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theoretical-framework-for-the-optimization-of-microphone-array-configuration-for-humanoid-robot-audition-2401.03286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theoretical-framework-for-the-optimization-of-microphone-array-configuration-for-humanoid-robot-audition-2401.03286"/></url>
<url><loc>https://scifaro.com/en/abs/design-framework-for-spherical-microphone-and-loudspeaker-arrays-in-a-multiple-input-multiple-output-system-2401.03291</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-framework-for-spherical-microphone-and-loudspeaker-arrays-in-a-multiple-input-multiple-output-system-2401.03291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-framework-for-spherical-microphone-and-loudspeaker-arrays-in-a-multiple-input-multiple-output-system-2401.03291"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-reverberation-and-dereverberation-using-an-acoustic-multiple-input-multiple-output-system-2401.03441</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-reverberation-and-dereverberation-using-an-acoustic-multiple-input-multiple-output-system-2401.03441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-reverberation-and-dereverberation-using-an-acoustic-multiple-input-multiple-output-system-2401.03441"/></url>
<url><loc>https://scifaro.com/en/abs/single-microphone-speaker-separation-and-voice-activity-detection-in-noisy-and-reverberant-environments-2401.03448</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-microphone-speaker-separation-and-voice-activity-detection-in-noisy-and-reverberant-environments-2401.03448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-microphone-speaker-separation-and-voice-activity-detection-in-noisy-and-reverberant-environments-2401.03448"/></url>
<url><loc>https://scifaro.com/en/abs/modal-smoothing-for-analysis-of-room-reflections-measured-with-spherical-microphone-and-loudspeaker-arrays-2401.03458</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modal-smoothing-for-analysis-of-room-reflections-measured-with-spherical-microphone-and-loudspeaker-arrays-2401.03458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modal-smoothing-for-analysis-of-room-reflections-measured-with-spherical-microphone-and-loudspeaker-arrays-2401.03458"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-av-wav2vec2-a-framework-for-learning-multichannel-multi-modal-speech-representation-2401.03468</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-av-wav2vec2-a-framework-for-learning-multichannel-multi-modal-speech-representation-2401.03468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-av-wav2vec2-a-framework-for-learning-multichannel-multi-modal-speech-representation-2401.03468"/></url>
<url><loc>https://scifaro.com/en/abs/theory-and-investigation-of-acoustic-multiple-input-multiple-output-systems-based-on-spherical-arrays-in-a-room-2401.03493</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/theory-and-investigation-of-acoustic-multiple-input-multiple-output-systems-based-on-spherical-arrays-in-a-room-2401.03493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/theory-and-investigation-of-acoustic-multiple-input-multiple-output-systems-based-on-spherical-arrays-in-a-room-2401.03493"/></url>
<url><loc>https://scifaro.com/en/abs/eat-self-supervised-pre-training-with-efficient-audio-transformer-2401.03497</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eat-self-supervised-pre-training-with-efficient-audio-transformer-2401.03497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eat-self-supervised-pre-training-with-efficient-audio-transformer-2401.03497"/></url>
<url><loc>https://scifaro.com/en/abs/diarizationlm-speaker-diarization-post-processing-with-large-language-models-2401.03506</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diarizationlm-speaker-diarization-post-processing-with-large-language-models-2401.03506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diarizationlm-speaker-diarization-post-processing-with-large-language-models-2401.03506"/></url>
<url><loc>https://scifaro.com/en/abs/hyperbolic-distance-based-speech-separation-2401.03567</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hyperbolic-distance-based-speech-separation-2401.03567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hyperbolic-distance-based-speech-separation-2401.03567"/></url>
<url><loc>https://scifaro.com/en/abs/ddd-a-perceptually-superior-low-response-time-dnn-based-declipper-2401.03650</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddd-a-perceptually-superior-low-response-time-dnn-based-declipper-2401.03650"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddd-a-perceptually-superior-low-response-time-dnn-based-declipper-2401.03650"/></url>
<url><loc>https://scifaro.com/en/abs/bs-plcnet-band-split-packet-loss-concealment-network-with-multi-task-learning-framework-and-multi-discriminators-2401.03687</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bs-plcnet-band-split-packet-loss-concealment-network-with-multi-task-learning-framework-and-multi-discriminators-2401.03687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bs-plcnet-band-split-packet-loss-concealment-network-with-multi-task-learning-framework-and-multi-discriminators-2401.03687"/></url>
<url><loc>https://scifaro.com/en/abs/lupet-incorporating-hierarchical-information-path-into-multilingual-asr-2401.03689</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lupet-incorporating-hierarchical-information-path-into-multilingual-asr-2401.03689"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lupet-incorporating-hierarchical-information-path-into-multilingual-asr-2401.03689"/></url>
<url><loc>https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-articulation-impaired-speech-using-augmented-reconstruction-loss-2401.03816</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-articulation-impaired-speech-using-augmented-reconstruction-loss-2401.03816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/creating-personalized-synthetic-voices-from-articulation-impaired-speech-using-augmented-reconstruction-loss-2401.03816"/></url>
<url><loc>https://scifaro.com/en/abs/inverse-nonlinearity-compensation-of-hyperelastic-deformation-in-dielectric-elastomer-for-acoustic-actuation-2401.03850</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inverse-nonlinearity-compensation-of-hyperelastic-deformation-in-dielectric-elastomer-for-acoustic-actuation-2401.03850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inverse-nonlinearity-compensation-of-hyperelastic-deformation-in-dielectric-elastomer-for-acoustic-actuation-2401.03850"/></url>
<url><loc>https://scifaro.com/en/abs/exploratory-evaluation-of-speech-content-masking-2401.03936</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploratory-evaluation-of-speech-content-masking-2401.03936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploratory-evaluation-of-speech-content-masking-2401.03936"/></url>
<url><loc>https://scifaro.com/en/abs/geodesic-interpolation-of-frame-wise-speaker-embeddings-for-the-diarization-of-meeting-scenarios-2401.03963</loc><lastmod>2024-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/geodesic-interpolation-of-frame-wise-speaker-embeddings-for-the-diarization-of-meeting-scenarios-2401.03963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/geodesic-interpolation-of-frame-wise-speaker-embeddings-for-the-diarization-of-meeting-scenarios-2401.03963"/></url>
<url><loc>https://scifaro.com/en/abs/using-perceptive-subbands-analysis-to-perform-audio-scenes-cartography-2401.04127</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-perceptive-subbands-analysis-to-perform-audio-scenes-cartography-2401.04127"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-perceptive-subbands-analysis-to-perform-audio-scenes-cartography-2401.04127"/></url>
<url><loc>https://scifaro.com/en/abs/fadi-aec-fast-score-based-diffusion-model-guided-by-far-end-signal-for-acoustic-echo-cancellation-2401.04283</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fadi-aec-fast-score-based-diffusion-model-guided-by-far-end-signal-for-acoustic-echo-cancellation-2401.04283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fadi-aec-fast-score-based-diffusion-model-guided-by-far-end-signal-for-acoustic-echo-cancellation-2401.04283"/></url>
<url><loc>https://scifaro.com/en/abs/class-incremental-learning-for-multi-label-audio-classification-2401.04447</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/class-incremental-learning-for-multi-label-audio-classification-2401.04447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/class-incremental-learning-for-multi-label-audio-classification-2401.04447"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-audio-to-audio-emotion-transfer-with-speaker-disentanglement-2401.04511</loc><lastmod>2024-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-audio-to-audio-emotion-transfer-with-speaker-disentanglement-2401.04511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-audio-to-audio-emotion-transfer-with-speaker-disentanglement-2401.04511"/></url>
<url><loc>https://scifaro.com/en/abs/full-frequency-dynamic-convolution-a-physical-frequency-dependent-convolution-for-sound-event-detection-2401.04976</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/full-frequency-dynamic-convolution-a-physical-frequency-dependent-convolution-for-sound-event-detection-2401.04976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/full-frequency-dynamic-convolution-a-physical-frequency-dependent-convolution-for-sound-event-detection-2401.04976"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-linear-and-nonlinear-methods-for-decoding-selective-attention-to-speech-from-ear-eeg-recordings-2401.05187</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-linear-and-nonlinear-methods-for-decoding-selective-attention-to-speech-from-ear-eeg-recordings-2401.05187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-linear-and-nonlinear-methods-for-decoding-selective-attention-to-speech-from-ear-eeg-recordings-2401.05187"/></url>
<url><loc>https://scifaro.com/en/abs/anim-400k-a-large-scale-dataset-for-automated-end-to-end-dubbing-of-video-2401.05314</loc><lastmod>2024-01-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anim-400k-a-large-scale-dataset-for-automated-end-to-end-dubbing-of-video-2401.05314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anim-400k-a-large-scale-dataset-for-automated-end-to-end-dubbing-of-video-2401.05314"/></url>
<url><loc>https://scifaro.com/en/abs/segment-boundary-detection-via-class-entropy-measurements-in-connectionist-phoneme-recognition-2401.05717</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segment-boundary-detection-via-class-entropy-measurements-in-connectionist-phoneme-recognition-2401.05717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segment-boundary-detection-via-class-entropy-measurements-in-connectionist-phoneme-recognition-2401.05717"/></url>
<url><loc>https://scifaro.com/en/abs/localizing-acoustic-energy-in-sound-field-synthesis-by-directionally-weighted-exterior-radiation-suppression-2401.05809</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localizing-acoustic-energy-in-sound-field-synthesis-by-directionally-weighted-exterior-radiation-suppression-2401.05809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localizing-acoustic-energy-in-sound-field-synthesis-by-directionally-weighted-exterior-radiation-suppression-2401.05809"/></url>
<url><loc>https://scifaro.com/en/abs/neural-ambisonics-encoding-for-compact-irregular-microphone-arrays-2401.05916</loc><lastmod>2024-01-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-ambisonics-encoding-for-compact-irregular-microphone-arrays-2401.05916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-ambisonics-encoding-for-compact-irregular-microphone-arrays-2401.05916"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-hindi-to-english-speech-conversion-using-bark-mbart-and-a-finetuned-xlsr-wav2vec2-2401.06183</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-hindi-to-english-speech-conversion-using-bark-mbart-and-a-finetuned-xlsr-wav2vec2-2401.06183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-hindi-to-english-speech-conversion-using-bark-mbart-and-a-finetuned-xlsr-wav2vec2-2401.06183"/></url>
<url><loc>https://scifaro.com/en/abs/remixing-music-for-hearing-aids-using-ensemble-of-fine-tuned-source-separators-2401.06203</loc><lastmod>2024-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remixing-music-for-hearing-aids-using-ensemble-of-fine-tuned-source-separators-2401.06203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remixing-music-for-hearing-aids-using-ensemble-of-fine-tuned-source-separators-2401.06203"/></url>
<url><loc>https://scifaro.com/en/abs/towards-high-quality-and-efficient-speech-bandwidth-extension-with-parallel-amplitude-and-phase-prediction-2401.06387</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-high-quality-and-efficient-speech-bandwidth-extension-with-parallel-amplitude-and-phase-prediction-2401.06387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-high-quality-and-efficient-speech-bandwidth-extension-with-parallel-amplitude-and-phase-prediction-2401.06387"/></url>
<url><loc>https://scifaro.com/en/abs/contrastive-learning-with-audio-discrimination-for-customizable-keyword-spotting-in-continuous-speech-2401.06485</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contrastive-learning-with-audio-discrimination-for-customizable-keyword-spotting-in-continuous-speech-2401.06485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contrastive-learning-with-audio-discrimination-for-customizable-keyword-spotting-in-continuous-speech-2401.06485"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-behaviour-of-connectionist-speech-recognition-with-strong-latency-constraints-2401.06588</loc><lastmod>2024-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-behaviour-of-connectionist-speech-recognition-with-strong-latency-constraints-2401.06588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-behaviour-of-connectionist-speech-recognition-with-strong-latency-constraints-2401.06588"/></url>
<url><loc>https://scifaro.com/en/abs/the-npu-aslp-liauto-system-description-for-visual-speech-recognition-in-cnvsrc-2023-2401.06788</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-npu-aslp-liauto-system-description-for-visual-speech-recognition-in-cnvsrc-2023-2401.06788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-npu-aslp-liauto-system-description-for-visual-speech-recognition-in-cnvsrc-2023-2401.06788"/></url>
<url><loc>https://scifaro.com/en/abs/maximum-entropy-adversarial-audio-augmentation-for-keyword-spotting-2401.06897</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maximum-entropy-adversarial-audio-augmentation-for-keyword-spotting-2401.06897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maximum-entropy-adversarial-audio-augmentation-for-keyword-spotting-2401.06897"/></url>
<url><loc>https://scifaro.com/en/abs/construction-and-evaluation-of-mandarin-multimodal-emotional-speech-database-2401.07336</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/construction-and-evaluation-of-mandarin-multimodal-emotional-speech-database-2401.07336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/construction-and-evaluation-of-mandarin-multimodal-emotional-speech-database-2401.07336"/></url>
<url><loc>https://scifaro.com/en/abs/who-said-what-an-automated-approach-to-analyzing-speech-in-preschool-classrooms-2401.07342</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-said-what-an-automated-approach-to-analyzing-speech-in-preschool-classrooms-2401.07342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-said-what-an-automated-approach-to-analyzing-speech-in-preschool-classrooms-2401.07342"/></url>
<url><loc>https://scifaro.com/en/abs/semascore-a-new-evaluation-metric-for-automatic-speech-recognition-tasks-2401.07506</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semascore-a-new-evaluation-metric-for-automatic-speech-recognition-tasks-2401.07506"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semascore-a-new-evaluation-metric-for-automatic-speech-recognition-tasks-2401.07506"/></url>
<url><loc>https://scifaro.com/en/abs/effect-of-target-signals-and-delays-on-spatially-selective-active-noise-control-for-open-fitting-hearables-2401.07681</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effect-of-target-signals-and-delays-on-spatially-selective-active-noise-control-for-open-fitting-hearables-2401.07681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effect-of-target-signals-and-delays-on-spatially-selective-active-noise-control-for-open-fitting-hearables-2401.07681"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-frequency-fusion-mechanisms-for-binaural-direction-of-arrival-estimation-for-multiple-speakers-2401.07849</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-frequency-fusion-mechanisms-for-binaural-direction-of-arrival-estimation-for-multiple-speakers-2401.07849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-frequency-fusion-mechanisms-for-binaural-direction-of-arrival-estimation-for-multiple-speakers-2401.07849"/></url>
<url><loc>https://scifaro.com/en/abs/multi-input-multi-output-target-speaker-voice-activity-detection-for-unified-flexible-and-robust-audio-visual-speaker-diarization-2401.08052</loc><lastmod>2025-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-input-multi-output-target-speaker-voice-activity-detection-for-unified-flexible-and-robust-audio-visual-speaker-diarization-2401.08052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-input-multi-output-target-speaker-voice-activity-detection-for-unified-flexible-and-robust-audio-visual-speaker-diarization-2401.08052"/></url>
<url><loc>https://scifaro.com/en/abs/ed-tts-multi-scale-emotion-modeling-using-cross-domain-emotion-diarization-for-emotional-speech-synthesis-2401.08166</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ed-tts-multi-scale-emotion-modeling-using-cross-domain-emotion-diarization-for-emotional-speech-synthesis-2401.08166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ed-tts-multi-scale-emotion-modeling-using-cross-domain-emotion-diarization-for-emotional-speech-synthesis-2401.08166"/></url>
<url><loc>https://scifaro.com/en/abs/an-explainable-proxy-model-for-multiabel-audio-segmentation-2401.08268</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-explainable-proxy-model-for-multiabel-audio-segmentation-2401.08268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-explainable-proxy-model-for-multiabel-audio-segmentation-2401.08268"/></url>
<url><loc>https://scifaro.com/en/abs/ecapa2-a-hybrid-neural-network-architecture-and-training-strategy-for-robust-speaker-embeddings-2401.08342</loc><lastmod>2024-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ecapa2-a-hybrid-neural-network-architecture-and-training-strategy-for-robust-speaker-embeddings-2401.08342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ecapa2-a-hybrid-neural-network-architecture-and-training-strategy-for-robust-speaker-embeddings-2401.08342"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-subset-selection-for-the-weighted-prediction-error-algorithm-using-a-group-sparsity-penalty-2401.08486</loc><lastmod>2026-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-subset-selection-for-the-weighted-prediction-error-algorithm-using-a-group-sparsity-penalty-2401.08486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-subset-selection-for-the-weighted-prediction-error-algorithm-using-a-group-sparsity-penalty-2401.08486"/></url>
<url><loc>https://scifaro.com/en/abs/sub-band-and-full-band-interactive-u-net-with-dprnn-for-demixing-cross-talk-stereo-music-2401.08678</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-band-and-full-band-interactive-u-net-with-dprnn-for-demixing-cross-talk-stereo-music-2401.08678"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-band-and-full-band-interactive-u-net-with-dprnn-for-demixing-cross-talk-stereo-music-2401.08678"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-self-supervised-learning-of-speech-representation-from-a-mutual-information-perspective-2401.08833</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-self-supervised-learning-of-speech-representation-from-a-mutual-information-perspective-2401.08833"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-self-supervised-learning-of-speech-representation-from-a-mutual-information-perspective-2401.08833"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-angular-separation-network-2401.08864</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-angular-separation-network-2401.08864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-angular-separation-network-2401.08864"/></url>
<url><loc>https://scifaro.com/en/abs/two-pass-endpoint-detection-for-speech-recognition-2401.08916</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-pass-endpoint-detection-for-speech-recognition-2401.08916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-pass-endpoint-detection-for-speech-recognition-2401.08916"/></url>
<url><loc>https://scifaro.com/en/abs/can-synthetic-data-boost-the-training-of-deep-acoustic-vehicle-counting-networks-2401.09308</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-synthetic-data-boost-the-training-of-deep-acoustic-vehicle-counting-networks-2401.09308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-synthetic-data-boost-the-training-of-deep-acoustic-vehicle-counting-networks-2401.09308"/></url>
<url><loc>https://scifaro.com/en/abs/on-speech-pre-emphasis-as-a-simple-and-inexpensive-method-to-boost-speech-enhancement-2401.09315</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-speech-pre-emphasis-as-a-simple-and-inexpensive-method-to-boost-speech-enhancement-2401.09315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-speech-pre-emphasis-as-a-simple-and-inexpensive-method-to-boost-speech-enhancement-2401.09315"/></url>
<url><loc>https://scifaro.com/en/abs/transcending-controlled-environments-assessing-the-transferability-of-asrrobust-nlu-models-to-real-world-applications-2401.09354</loc><lastmod>2024-01-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcending-controlled-environments-assessing-the-transferability-of-asrrobust-nlu-models-to-real-world-applications-2401.09354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcending-controlled-environments-assessing-the-transferability-of-asrrobust-nlu-models-to-real-world-applications-2401.09354"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-study-on-the-impact-of-positional-encoding-in-transformer-based-monaural-speech-enhancement-2401.09686</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-study-on-the-impact-of-positional-encoding-in-transformer-based-monaural-speech-enhancement-2401.09686"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-study-on-the-impact-of-positional-encoding-in-transformer-based-monaural-speech-enhancement-2401.09686"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-selection-for-analyzing-conversations-with-autism-spectrum-disorder-2401.09717</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-selection-for-analyzing-conversations-with-autism-spectrum-disorder-2401.09717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-selection-for-analyzing-conversations-with-autism-spectrum-disorder-2401.09717"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-training-for-multilingual-visual-speech-recognition-pre-training-with-discretized-visual-speech-representation-2401.09802</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-training-for-multilingual-visual-speech-recognition-pre-training-with-discretized-visual-speech-representation-2401.09802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-training-for-multilingual-visual-speech-recognition-pre-training-with-discretized-visual-speech-representation-2401.09802"/></url>
<url><loc>https://scifaro.com/en/abs/fregrad-lightweight-and-fast-frequency-aware-diffusion-vocoder-2401.10032</loc><lastmod>2024-01-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fregrad-lightweight-and-fast-frequency-aware-diffusion-vocoder-2401.10032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fregrad-lightweight-and-fast-frequency-aware-diffusion-vocoder-2401.10032"/></url>
<url><loc>https://scifaro.com/en/abs/agadir-towards-array-geometry-agnostic-directional-speech-recognition-2401.10411</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/agadir-towards-array-geometry-agnostic-directional-speech-recognition-2401.10411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/agadir-towards-array-geometry-agnostic-directional-speech-recognition-2401.10411"/></url>
<url><loc>https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-attention-based-bias-phrase-boosted-beam-search-2401.10449</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-attention-based-bias-phrase-boosted-beam-search-2401.10449"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-attention-based-bias-phrase-boosted-beam-search-2401.10449"/></url>
<url><loc>https://scifaro.com/en/abs/3d-room-geometry-inference-from-multichannel-room-impulse-response-using-deep-neural-network-2401.10453</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-room-geometry-inference-from-multichannel-room-impulse-response-using-deep-neural-network-2401.10453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-room-geometry-inference-from-multichannel-room-impulse-response-using-deep-neural-network-2401.10453"/></url>
<url><loc>https://scifaro.com/en/abs/a-two-stage-framework-in-cross-spectrum-domain-for-real-time-speech-enhancement-2401.10494</loc><lastmod>2024-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-two-stage-framework-in-cross-spectrum-domain-for-real-time-speech-enhancement-2401.10494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-two-stage-framework-in-cross-spectrum-domain-for-real-time-speech-enhancement-2401.10494"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-acoustic-word-embeddings-for-zero-resource-languages-2401.10543</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-acoustic-word-embeddings-for-zero-resource-languages-2401.10543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-acoustic-word-embeddings-for-zero-resource-languages-2401.10543"/></url>
<url><loc>https://scifaro.com/en/abs/revealing-emotional-clusters-in-speaker-embeddings-a-contrastive-learning-strategy-for-speech-emotion-recognition-2401.11017</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revealing-emotional-clusters-in-speaker-embeddings-a-contrastive-learning-strategy-for-speech-emotion-recognition-2401.11017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revealing-emotional-clusters-in-speaker-embeddings-a-contrastive-learning-strategy-for-speech-emotion-recognition-2401.11017"/></url>
<url><loc>https://scifaro.com/en/abs/streamvoice-streamable-context-aware-language-modeling-for-real-time-zero-shot-voice-conversion-2401.11053</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamvoice-streamable-context-aware-language-modeling-for-real-time-zero-shot-voice-conversion-2401.11053"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamvoice-streamable-context-aware-language-modeling-for-real-time-zero-shot-voice-conversion-2401.11053"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-bilingual-end-to-end-asr-model-using-attention-over-multiple-softmax-2401.11645</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-bilingual-end-to-end-asr-model-using-attention-over-multiple-softmax-2401.11645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-bilingual-end-to-end-asr-model-using-attention-over-multiple-softmax-2401.11645"/></url>
<url><loc>https://scifaro.com/en/abs/empowering-communication-speech-technology-for-indian-and-western-accents-through-ai-powered-speech-synthesis-2401.11771</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/empowering-communication-speech-technology-for-indian-and-western-accents-through-ai-powered-speech-synthesis-2401.11771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/empowering-communication-speech-technology-for-indian-and-western-accents-through-ai-powered-speech-synthesis-2401.11771"/></url>
<url><loc>https://scifaro.com/en/abs/harmonic-detection-from-noisy-speech-with-auditory-frame-gain-for-intelligibility-enhancement-2401.11829</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harmonic-detection-from-noisy-speech-with-auditory-frame-gain-for-intelligibility-enhancement-2401.11829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harmonic-detection-from-noisy-speech-with-auditory-frame-gain-for-intelligibility-enhancement-2401.11829"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-disturbance-sensing-level-detection-for-asd-diagnosis-and-intelligibility-enhancement-2401.11832</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-disturbance-sensing-level-detection-for-asd-diagnosis-and-intelligibility-enhancement-2401.11832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-disturbance-sensing-level-detection-for-asd-diagnosis-and-intelligibility-enhancement-2401.11832"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-speech-for-voice-privacy-protection-from-personalized-speech-generation-2401.11857</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-speech-for-voice-privacy-protection-from-personalized-speech-generation-2401.11857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-speech-for-voice-privacy-protection-from-personalized-speech-generation-2401.11857"/></url>
<url><loc>https://scifaro.com/en/abs/consistency-based-unsupervised-self-training-for-asr-personalisation-2401.12085</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/consistency-based-unsupervised-self-training-for-asr-personalisation-2401.12085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/consistency-based-unsupervised-self-training-for-asr-personalisation-2401.12085"/></url>
<url><loc>https://scifaro.com/en/abs/scoredec-a-phase-preserving-high-fidelity-audio-codec-with-a-generalized-score-based-diffusion-post-filter-2401.12160</loc><lastmod>2024-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scoredec-a-phase-preserving-high-fidelity-audio-codec-with-a-generalized-score-based-diffusion-post-filter-2401.12160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scoredec-a-phase-preserving-high-fidelity-audio-codec-with-a-generalized-score-based-diffusion-post-filter-2401.12160"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-scaper-a-library-to-simulate-and-augment-soundscapes-for-sound-event-localization-and-detection-in-realistic-rooms-2401.12238</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-scaper-a-library-to-simulate-and-augment-soundscapes-for-sound-event-localization-and-detection-in-realistic-rooms-2401.12238"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-scaper-a-library-to-simulate-and-augment-soundscapes-for-sound-event-localization-and-detection-in-realistic-rooms-2401.12238"/></url>
<url><loc>https://scifaro.com/en/abs/coavt-a-cognition-inspired-unified-audio-visual-text-pre-training-model-for-multimodal-processing-2401.12264</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coavt-a-cognition-inspired-unified-audio-visual-text-pre-training-model-for-multimodal-processing-2401.12264"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coavt-a-cognition-inspired-unified-audio-visual-text-pre-training-model-for-multimodal-processing-2401.12264"/></url>
<url><loc>https://scifaro.com/en/abs/post-training-embedding-alignment-for-decoupling-enrollment-and-runtime-speaker-recognition-models-2401.12440</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/post-training-embedding-alignment-for-decoupling-enrollment-and-runtime-speaker-recognition-models-2401.12440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/post-training-embedding-alignment-for-decoupling-enrollment-and-runtime-speaker-recognition-models-2401.12440"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-unknown-number-speaker-separation-with-transformer-decoder-based-attractor-2401.12473</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-unknown-number-speaker-separation-with-transformer-decoder-based-attractor-2401.12473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-unknown-number-speaker-separation-with-transformer-decoder-based-attractor-2401.12473"/></url>
<url><loc>https://scifaro.com/en/abs/diffmoog-a-differentiable-modular-synthesizer-for-sound-matching-2401.12570</loc><lastmod>2024-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffmoog-a-differentiable-modular-synthesizer-for-sound-matching-2401.12570"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffmoog-a-differentiable-modular-synthesizer-for-sound-matching-2401.12570"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-supervised-hierarchical-graph-clustering-for-speaker-diarization-2401.12850</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-supervised-hierarchical-graph-clustering-for-speaker-diarization-2401.12850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-supervised-hierarchical-graph-clustering-for-speaker-diarization-2401.12850"/></url>
<url><loc>https://scifaro.com/en/abs/locality-enhanced-dynamic-biasing-and-sampling-strategies-for-contextual-asr-2401.13146</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/locality-enhanced-dynamic-biasing-and-sampling-strategies-for-contextual-asr-2401.13146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/locality-enhanced-dynamic-biasing-and-sampling-strategies-for-contextual-asr-2401.13146"/></url>
<url><loc>https://scifaro.com/en/abs/mos-fad-improving-fake-audio-detection-via-automatic-mean-opinion-score-prediction-2401.13249</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mos-fad-improving-fake-audio-detection-via-automatic-mean-opinion-score-prediction-2401.13249"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mos-fad-improving-fake-audio-detection-via-automatic-mean-opinion-score-prediction-2401.13249"/></url>
<url><loc>https://scifaro.com/en/abs/scnet-sparse-compression-network-for-music-source-separation-2401.13276</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scnet-sparse-compression-network-for-music-source-separation-2401.13276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scnet-sparse-compression-network-for-music-source-separation-2401.13276"/></url>
<url><loc>https://scifaro.com/en/abs/perceptually-motivated-spatial-audio-codec-for-higher-order-ambisonics-compression-2401.13401</loc><lastmod>2024-01-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptually-motivated-spatial-audio-codec-for-higher-order-ambisonics-compression-2401.13401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptually-motivated-spatial-audio-codec-for-higher-order-ambisonics-compression-2401.13401"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-adaptive-learning-to-latent-variables-via-variational-bayes-and-maximum-a-posteriori-2401.13766</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-adaptive-learning-to-latent-variables-via-variational-bayes-and-maximum-a-posteriori-2401.13766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-adaptive-learning-to-latent-variables-via-variational-bayes-and-maximum-a-posteriori-2401.13766"/></url>
<url><loc>https://scifaro.com/en/abs/intelli-z-toward-intelligible-zero-shot-tts-2401.13921</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/intelli-z-toward-intelligible-zero-shot-tts-2401.13921"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/intelli-z-toward-intelligible-zero-shot-tts-2401.13921"/></url>
<url><loc>https://scifaro.com/en/abs/combined-generative-and-predictive-modeling-for-speech-super-resolution-2401.14269</loc><lastmod>2024-01-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combined-generative-and-predictive-modeling-for-speech-super-resolution-2401.14269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combined-generative-and-predictive-modeling-for-speech-super-resolution-2401.14269"/></url>
<url><loc>https://scifaro.com/en/abs/improving-design-of-input-condition-invariant-speech-enhancement-2401.14271</loc><lastmod>2024-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-design-of-input-condition-invariant-speech-enhancement-2401.14271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-design-of-input-condition-invariant-speech-enhancement-2401.14271"/></url>
<url><loc>https://scifaro.com/en/abs/vall-t-decoder-only-generative-transducer-for-robust-and-decoding-controllable-text-to-speech-2401.14321</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vall-t-decoder-only-generative-transducer-for-robust-and-decoding-controllable-text-to-speech-2401.14321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vall-t-decoder-only-generative-transducer-for-robust-and-decoding-controllable-text-to-speech-2401.14321"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-proximity-effect-using-broadband-signals-2401.14410</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-proximity-effect-using-broadband-signals-2401.14410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-proximity-effect-using-broadband-signals-2401.14410"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-characterization-of-speech-rhythm-going-beyond-metrics-with-recurrent-neural-networks-2401.14416</loc><lastmod>2024-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-characterization-of-speech-rhythm-going-beyond-metrics-with-recurrent-neural-networks-2401.14416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-characterization-of-speech-rhythm-going-beyond-metrics-with-recurrent-neural-networks-2401.14416"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-a-text-independent-speaker-verification-system-by-using-feature-combination-and-parallel-structure-classifiers-2401.15018</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-a-text-independent-speaker-verification-system-by-using-feature-combination-and-parallel-structure-classifiers-2401.15018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-a-text-independent-speaker-verification-system-by-using-feature-combination-and-parallel-structure-classifiers-2401.15018"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-analysis-and-synthesis-methods-subjective-and-objective-evaluations-using-various-microphone-arrays-in-the-auralization-of-a-critical-listening-room-2401.15023</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-analysis-and-synthesis-methods-subjective-and-objective-evaluations-using-various-microphone-arrays-in-the-auralization-of-a-critical-listening-room-2401.15023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-analysis-and-synthesis-methods-subjective-and-objective-evaluations-using-various-microphone-arrays-in-the-auralization-of-a-critical-listening-room-2401.15023"/></url>
<url><loc>https://scifaro.com/en/abs/on-speaker-attribution-with-surt-2401.15676</loc><lastmod>2024-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-speaker-attribution-with-surt-2401.15676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-speaker-attribution-with-surt-2401.15676"/></url>
<url><loc>https://scifaro.com/en/abs/localizing-uniformly-moving-single-frequency-sources-using-an-inverse-2-5d-approach-2401.16819</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/localizing-uniformly-moving-single-frequency-sources-using-an-inverse-2-5d-approach-2401.16819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/localizing-uniformly-moving-single-frequency-sources-using-an-inverse-2-5d-approach-2401.16819"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-temporal-activity-informed-diarization-and-separation-2401.16850</loc><lastmod>2024-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-temporal-activity-informed-diarization-and-separation-2401.16850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-temporal-activity-informed-diarization-and-separation-2401.16850"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-gamma-band-responses-to-the-speech-envelope-for-the-icassp-2024-auditory-eeg-decoding-signal-processing-grand-challenge-2401.17380</loc><lastmod>2024-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-gamma-band-responses-to-the-speech-envelope-for-the-icassp-2024-auditory-eeg-decoding-signal-processing-grand-challenge-2401.17380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-gamma-band-responses-to-the-speech-envelope-for-the-icassp-2024-auditory-eeg-decoding-signal-processing-grand-challenge-2401.17380"/></url>
<url><loc>https://scifaro.com/en/abs/enclap-combining-neural-audio-codec-and-audio-text-joint-embedding-for-automated-audio-captioning-2401.17690</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enclap-combining-neural-audio-codec-and-audio-text-joint-embedding-for-automated-audio-captioning-2401.17690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enclap-combining-neural-audio-codec-and-audio-text-joint-embedding-for-automated-audio-captioning-2401.17690"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-speech-segmentation-and-lexicon-learning-with-better-features-2401.17902</loc><lastmod>2024-02-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-speech-segmentation-and-lexicon-learning-with-better-features-2401.17902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-speech-segmentation-and-lexicon-learning-with-better-features-2401.17902"/></url>
<url><loc>https://scifaro.com/en/abs/online-speaker-diarization-of-meetings-guided-by-speech-separation-2402.00067</loc><lastmod>2024-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-speaker-diarization-of-meetings-guided-by-speech-separation-2402.00067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-speaker-diarization-of-meetings-guided-by-speech-separation-2402.00067"/></url>
<url><loc>https://scifaro.com/en/abs/pam-prompting-audio-language-models-for-audio-quality-assessment-2402.00282</loc><lastmod>2024-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pam-prompting-audio-language-models-for-audio-quality-assessment-2402.00282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pam-prompting-audio-language-models-for-audio-quality-assessment-2402.00282"/></url>
<url><loc>https://scifaro.com/en/abs/frame-wise-breath-detection-with-self-training-an-exploration-of-enhancing-breath-naturalness-in-text-to-speech-2402.00288</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frame-wise-breath-detection-with-self-training-an-exploration-of-enhancing-breath-naturalness-in-text-to-speech-2402.00288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frame-wise-breath-detection-with-self-training-an-exploration-of-enhancing-breath-naturalness-in-text-to-speech-2402.00288"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-stereo-speech-enhancement-with-spatial-cue-preservation-based-on-dual-path-structure-2402.00337</loc><lastmod>2024-02-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-stereo-speech-enhancement-with-spatial-cue-preservation-based-on-dual-path-structure-2402.00337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-stereo-speech-enhancement-with-spatial-cue-preservation-based-on-dual-path-structure-2402.00337"/></url>
<url><loc>https://scifaro.com/en/abs/an-analysis-of-the-variance-of-diffusion-based-speech-enhancement-2402.00811</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-analysis-of-the-variance-of-diffusion-based-speech-enhancement-2402.00811"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-analysis-of-the-variance-of-diffusion-based-speech-enhancement-2402.00811"/></url>
<url><loc>https://scifaro.com/en/abs/usdnet-unsupervised-speech-dereverberation-via-neural-forward-filtering-2402.00820</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usdnet-unsupervised-speech-dereverberation-via-neural-forward-filtering-2402.00820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usdnet-unsupervised-speech-dereverberation-via-neural-forward-filtering-2402.00820"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-fine-tuning-of-audio-spectrogram-transformers-via-soft-mixture-of-adapters-2402.00828</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-fine-tuning-of-audio-spectrogram-transformers-via-soft-mixture-of-adapters-2402.00828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-fine-tuning-of-audio-spectrogram-transformers-via-soft-mixture-of-adapters-2402.00828"/></url>
<url><loc>https://scifaro.com/en/abs/deep-room-impulse-response-completion-2402.00859</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-room-impulse-response-completion-2402.00859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-room-impulse-response-completion-2402.00859"/></url>
<url><loc>https://scifaro.com/en/abs/an-intra-brnn-and-gb-rvq-based-end-to-end-neural-audio-codec-2402.01271</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-intra-brnn-and-gb-rvq-based-end-to-end-neural-audio-codec-2402.01271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-intra-brnn-and-gb-rvq-based-end-to-end-neural-audio-codec-2402.01271"/></url>
<url><loc>https://scifaro.com/en/abs/learning-semantic-information-from-raw-audio-signal-using-both-contextual-and-phonetic-representations-2402.01298</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-semantic-information-from-raw-audio-signal-using-both-contextual-and-phonetic-representations-2402.01298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-semantic-information-from-raw-audio-signal-using-both-contextual-and-phonetic-representations-2402.01298"/></url>
<url><loc>https://scifaro.com/en/abs/del-visual-al-auditivo-sonorizaci-on-de-escenas-guiada-por-imagen-2402.01385</loc><lastmod>2024-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/del-visual-al-auditivo-sonorizaci-on-de-escenas-guiada-por-imagen-2402.01385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/del-visual-al-auditivo-sonorizaci-on-de-escenas-guiada-por-imagen-2402.01385"/></url>
<url><loc>https://scifaro.com/en/abs/are-paralinguistic-representations-all-that-is-needed-for-speech-emotion-recognition-2402.01579</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-paralinguistic-representations-all-that-is-needed-for-speech-emotion-recognition-2402.01579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-paralinguistic-representations-all-that-is-needed-for-speech-emotion-recognition-2402.01579"/></url>
<url><loc>https://scifaro.com/en/abs/bat-learning-to-reason-about-spatial-sounds-with-large-language-models-2402.01591</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bat-learning-to-reason-about-spatial-sounds-with-large-language-models-2402.01591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bat-learning-to-reason-about-spatial-sounds-with-large-language-models-2402.01591"/></url>
<url><loc>https://scifaro.com/en/abs/identifying-false-content-and-hate-speech-in-sinhala-youtube-videos-by-analyzing-the-audio-2402.01752</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identifying-false-content-and-hate-speech-in-sinhala-youtube-videos-by-analyzing-the-audio-2402.01752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identifying-false-content-and-hate-speech-in-sinhala-youtube-videos-by-analyzing-the-audio-2402.01752"/></url>
<url><loc>https://scifaro.com/en/abs/introduction-to-speech-recognition-2402.01778</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introduction-to-speech-recognition-2402.01778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introduction-to-speech-recognition-2402.01778"/></url>
<url><loc>https://scifaro.com/en/abs/speech-foundation-models-in-healthcare-effect-of-layer-selection-on-pathological-speech-feature-prediction-2402.01796</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-foundation-models-in-healthcare-effect-of-layer-selection-on-pathological-speech-feature-prediction-2402.01796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-foundation-models-in-healthcare-effect-of-layer-selection-on-pathological-speech-feature-prediction-2402.01796"/></url>
<url><loc>https://scifaro.com/en/abs/tomobrush-exploring-dental-health-sensing-using-a-sonic-toothbrush-2402.01933</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tomobrush-exploring-dental-health-sensing-using-a-sonic-toothbrush-2402.01933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tomobrush-exploring-dental-health-sensing-using-a-sonic-toothbrush-2402.01933"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-positive-transfer-for-improved-low-resource-speech-recognition-using-acoustic-pseudo-tokens-2402.02302</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-positive-transfer-for-improved-low-resource-speech-recognition-using-acoustic-pseudo-tokens-2402.02302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-positive-transfer-for-improved-low-resource-speech-recognition-using-acoustic-pseudo-tokens-2402.02302"/></url>
<url><loc>https://scifaro.com/en/abs/description-on-ieee-icme-2024-grand-challenge-semi-supervised-acoustic-scene-classification-under-domain-shift-2402.02694</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-on-ieee-icme-2024-grand-challenge-semi-supervised-acoustic-scene-classification-under-domain-shift-2402.02694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-on-ieee-icme-2024-grand-challenge-semi-supervised-acoustic-scene-classification-under-domain-shift-2402.02694"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-of-depression-in-speech-using-ensemble-convolutional-neural-networks-2402.02830</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-of-depression-in-speech-using-ensemble-convolutional-neural-networks-2402.02830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-of-depression-in-speech-using-ensemble-convolutional-neural-networks-2402.02830"/></url>
<url><loc>https://scifaro.com/en/abs/an-attention-long-short-term-memory-based-system-for-automatic-classification-of-speech-intelligibility-2402.02850</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-attention-long-short-term-memory-based-system-for-automatic-classification-of-speech-intelligibility-2402.02850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-attention-long-short-term-memory-based-system-for-automatic-classification-of-speech-intelligibility-2402.02850"/></url>
<url><loc>https://scifaro.com/en/abs/on-combining-acoustic-and-modulation-spectrograms-in-an-attention-lstm-based-system-for-speech-intelligibility-level-classification-2402.02865</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-combining-acoustic-and-modulation-spectrograms-in-an-attention-lstm-based-system-for-speech-intelligibility-level-classification-2402.02865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-combining-acoustic-and-modulation-spectrograms-in-an-attention-lstm-based-system-for-speech-intelligibility-level-classification-2402.02865"/></url>
<url><loc>https://scifaro.com/en/abs/positive-and-negative-sampling-strategies-for-self-supervised-learning-on-audio-video-data-2402.02899</loc><lastmod>2024-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/positive-and-negative-sampling-strategies-for-self-supervised-learning-on-audio-video-data-2402.02899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/positive-and-negative-sampling-strategies-for-self-supervised-learning-on-audio-video-data-2402.02899"/></url>
<url><loc>https://scifaro.com/en/abs/array-geometry-robust-attention-based-neural-beamformer-for-moving-speakers-2402.03058</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/array-geometry-robust-attention-based-neural-beamformer-for-moving-speakers-2402.03058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/array-geometry-robust-attention-based-neural-beamformer-for-moving-speakers-2402.03058"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-google-s-voice-recognition-and-sentence-classification-for-health-care-applications-2402.03369</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-google-s-voice-recognition-and-sentence-classification-for-health-care-applications-2402.03369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-google-s-voice-recognition-and-sentence-classification-for-health-care-applications-2402.03369"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-the-stability-of-llm-based-speech-generation-systems-through-self-supervised-representations-2402.03407</loc><lastmod>2024-02-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-the-stability-of-llm-based-speech-generation-systems-through-self-supervised-representations-2402.03407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-the-stability-of-llm-based-speech-generation-systems-through-self-supervised-representations-2402.03407"/></url>
<url><loc>https://scifaro.com/en/abs/listen-chat-and-remix-text-guided-soundscape-remixing-for-enhanced-auditory-experience-2402.03710</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-chat-and-remix-text-guided-soundscape-remixing-for-enhanced-auditory-experience-2402.03710"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-chat-and-remix-text-guided-soundscape-remixing-for-enhanced-auditory-experience-2402.03710"/></url>
<url><loc>https://scifaro.com/en/abs/reborn-reinforcement-learned-boundary-segmentation-with-iterative-training-for-unsupervised-asr-2402.03988</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reborn-reinforcement-learned-boundary-segmentation-with-iterative-training-for-unsupervised-asr-2402.03988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reborn-reinforcement-learned-boundary-segmentation-with-iterative-training-for-unsupervised-asr-2402.03988"/></url>
<url><loc>https://scifaro.com/en/abs/large-vocabulary-spontaneous-speech-recognition-for-tigrigna-2402.04254</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-vocabulary-spontaneous-speech-recognition-for-tigrigna-2402.04254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-vocabulary-spontaneous-speech-recognition-for-tigrigna-2402.04254"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-unsupervised-domain-adaptation-for-asr-using-ensemble-models-and-multi-stage-training-2402.04805</loc><lastmod>2024-02-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-unsupervised-domain-adaptation-for-asr-using-ensemble-models-and-multi-stage-training-2402.04805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-unsupervised-domain-adaptation-for-asr-using-ensemble-models-and-multi-stage-training-2402.04805"/></url>
<url><loc>https://scifaro.com/en/abs/room-transfer-function-reconstruction-using-complex-valued-neural-networks-and-irregularly-distributed-microphones-2402.04866</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-transfer-function-reconstruction-using-complex-valued-neural-networks-and-irregularly-distributed-microphones-2402.04866"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-transfer-function-reconstruction-using-complex-valued-neural-networks-and-irregularly-distributed-microphones-2402.04866"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-self-supervised-speech-model-with-pseudo-word-level-targets-from-visually-grounded-speech-model-2402.05819</loc><lastmod>2024-02-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-self-supervised-speech-model-with-pseudo-word-level-targets-from-visually-grounded-speech-model-2402.05819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-self-supervised-speech-model-with-pseudo-word-level-targets-from-visually-grounded-speech-model-2402.05819"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-joint-detection-and-localization-of-acoustic-reflectors-2402.06246</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-joint-detection-and-localization-of-acoustic-reflectors-2402.06246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-joint-detection-and-localization-of-acoustic-reflectors-2402.06246"/></url>
<url><loc>https://scifaro.com/en/abs/a-transversal-study-of-fundamental-frequency-contours-in-parkinsonian-voices-2402.06387</loc><lastmod>2024-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-transversal-study-of-fundamental-frequency-contours-in-parkinsonian-voices-2402.06387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-transversal-study-of-fundamental-frequency-contours-in-parkinsonian-voices-2402.06387"/></url>
<url><loc>https://scifaro.com/en/abs/sound-source-separation-using-latent-variational-block-wise-disentanglement-2402.06683</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-source-separation-using-latent-variational-block-wise-disentanglement-2402.06683"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-source-separation-using-latent-variational-block-wise-disentanglement-2402.06683"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-self-supervised-speech-models-on-children-s-speech-and-infant-vocalizations-2402.06888</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-self-supervised-speech-models-on-children-s-speech-and-infant-vocalizations-2402.06888"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-self-supervised-speech-models-on-children-s-speech-and-infant-vocalizations-2402.06888"/></url>
<url><loc>https://scifaro.com/en/abs/cochceps-augment-a-novel-self-supervised-contrastive-learning-using-cochlear-cepstrum-based-masking-for-speech-emotion-recognition-2402.06923</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cochceps-augment-a-novel-self-supervised-contrastive-learning-using-cochlear-cepstrum-based-masking-for-speech-emotion-recognition-2402.06923"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cochceps-augment-a-novel-self-supervised-contrastive-learning-using-cochlear-cepstrum-based-masking-for-speech-emotion-recognition-2402.06923"/></url>
<url><loc>https://scifaro.com/en/abs/making-flow-matching-based-zero-shot-text-to-speech-laugh-as-you-like-2402.07383</loc><lastmod>2024-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/making-flow-matching-based-zero-shot-text-to-speech-laugh-as-you-like-2402.07383"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/making-flow-matching-based-zero-shot-text-to-speech-laugh-as-you-like-2402.07383"/></url>
<url><loc>https://scifaro.com/en/abs/interactive-singing-melody-extraction-based-on-active-adaptation-2402.07599</loc><lastmod>2024-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interactive-singing-melody-extraction-based-on-active-adaptation-2402.07599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interactive-singing-melody-extraction-based-on-active-adaptation-2402.07599"/></url>
<url><loc>https://scifaro.com/en/abs/air-bench-benchmarking-large-audio-language-models-via-generative-comprehension-2402.07729</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/air-bench-benchmarking-large-audio-language-models-via-generative-comprehension-2402.07729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/air-bench-benchmarking-large-audio-language-models-via-generative-comprehension-2402.07729"/></url>
<url><loc>https://scifaro.com/en/abs/unrestricted-global-phase-bias-aware-single-channel-speech-enhancement-with-conformer-based-metric-gan-2402.08252</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unrestricted-global-phase-bias-aware-single-channel-speech-enhancement-with-conformer-based-metric-gan-2402.08252"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unrestricted-global-phase-bias-aware-single-channel-speech-enhancement-with-conformer-based-metric-gan-2402.08252"/></url>
<url><loc>https://scifaro.com/en/abs/channel-combination-algorithms-for-robust-distant-voice-activity-and-overlapped-speech-detection-2402.08312</loc><lastmod>2024-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-combination-algorithms-for-robust-distant-voice-activity-and-overlapped-speech-detection-2402.08312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-combination-algorithms-for-robust-distant-voice-activity-and-overlapped-speech-detection-2402.08312"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-cough-sounds-to-optimize-chest-x-ray-usage-in-low-resource-settings-2402.08789</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-cough-sounds-to-optimize-chest-x-ray-usage-in-low-resource-settings-2402.08789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-cough-sounds-to-optimize-chest-x-ray-usage-in-low-resource-settings-2402.08789"/></url>
<url><loc>https://scifaro.com/en/abs/unienc-cassnat-an-encoder-only-non-autoregressive-asr-for-speech-ssl-models-2402.08898</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unienc-cassnat-an-encoder-only-non-autoregressive-asr-for-speech-ssl-models-2402.08898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unienc-cassnat-an-encoder-only-non-autoregressive-asr-for-speech-ssl-models-2402.08898"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-reconstruction-using-a-compact-acoustics-informed-neural-network-2402.08904</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-reconstruction-using-a-compact-acoustics-informed-neural-network-2402.08904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-reconstruction-using-a-compact-acoustics-informed-neural-network-2402.08904"/></url>
<url><loc>https://scifaro.com/en/abs/listening-to-multi-talker-conversations-modular-and-end-to-end-perspectives-2402.08932</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listening-to-multi-talker-conversations-modular-and-end-to-end-perspectives-2402.08932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listening-to-multi-talker-conversations-modular-and-end-to-end-perspectives-2402.08932"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-the-l3das23-challenge-on-audio-visual-extended-reality-2402.09245</loc><lastmod>2024-02-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-the-l3das23-challenge-on-audio-visual-extended-reality-2402.09245"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-the-l3das23-challenge-on-audio-visual-extended-reality-2402.09245"/></url>
<url><loc>https://scifaro.com/en/abs/mixture-to-mixture-leveraging-close-talk-mixtures-as-weak-supervision-for-speech-separation-2402.09313</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mixture-to-mixture-leveraging-close-talk-mixtures-as-weak-supervision-for-speech-separation-2402.09313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mixture-to-mixture-leveraging-close-talk-mixtures-as-weak-supervision-for-speech-separation-2402.09313"/></url>
<url><loc>https://scifaro.com/en/abs/mobilespeech-a-fast-and-high-fidelity-framework-for-mobile-zero-shot-text-to-speech-2402.09378</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mobilespeech-a-fast-and-high-fidelity-framework-for-mobile-zero-shot-text-to-speech-2402.09378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mobilespeech-a-fast-and-high-fidelity-framework-for-mobile-zero-shot-text-to-speech-2402.09378"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-models-for-audio-restoration-2402.09821</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-models-for-audio-restoration-2402.09821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-models-for-audio-restoration-2402.09821"/></url>
<url><loc>https://scifaro.com/en/abs/speaking-in-wavelet-domain-a-simple-and-efficient-approach-to-speed-up-speech-diffusion-model-2402.10642</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaking-in-wavelet-domain-a-simple-and-efficient-approach-to-speed-up-speech-diffusion-model-2402.10642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaking-in-wavelet-domain-a-simple-and-efficient-approach-to-speed-up-speech-diffusion-model-2402.10642"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-tiny-colorless-feedback-delay-networks-2402.11216</loc><lastmod>2025-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-tiny-colorless-feedback-delay-networks-2402.11216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-tiny-colorless-feedback-delay-networks-2402.11216"/></url>
<url><loc>https://scifaro.com/en/abs/diffuse-sound-field-synthesis-2402.11330</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffuse-sound-field-synthesis-2402.11330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffuse-sound-field-synthesis-2402.11330"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-finetuning-for-speech-emotion-recognition-and-domain-adaptation-2402.11747</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-finetuning-for-speech-emotion-recognition-and-domain-adaptation-2402.11747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-finetuning-for-speech-emotion-recognition-and-domain-adaptation-2402.11747"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relationship-between-speech-and-hearing-2402.12094</loc><lastmod>2024-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relationship-between-speech-and-hearing-2402.12094"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relationship-between-speech-and-hearing-2402.12094"/></url>
<url><loc>https://scifaro.com/en/abs/language-codec-bridging-discrete-codec-representations-and-speech-language-models-2402.12208</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-codec-bridging-discrete-codec-representations-and-speech-language-models-2402.12208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-codec-bridging-discrete-codec-representations-and-speech-language-models-2402.12208"/></url>
<url><loc>https://scifaro.com/en/abs/bayesian-parameter-efficient-fine-tuning-for-overcoming-catastrophic-forgetting-2402.12220</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bayesian-parameter-efficient-fine-tuning-for-overcoming-catastrophic-forgetting-2402.12220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bayesian-parameter-efficient-fine-tuning-for-overcoming-catastrophic-forgetting-2402.12220"/></url>
<url><loc>https://scifaro.com/en/abs/plugin-speech-enhancement-a-universal-speech-enhancement-framework-inspired-by-dynamic-neural-network-2402.12746</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/plugin-speech-enhancement-a-universal-speech-enhancement-framework-inspired-by-dynamic-neural-network-2402.12746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/plugin-speech-enhancement-a-universal-speech-enhancement-framework-inspired-by-dynamic-neural-network-2402.12746"/></url>
<url><loc>https://scifaro.com/en/abs/emo-superb-an-in-depth-look-at-speech-emotion-recognition-2402.13018</loc><lastmod>2024-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-superb-an-in-depth-look-at-speech-emotion-recognition-2402.13018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-superb-an-in-depth-look-at-speech-emotion-recognition-2402.13018"/></url>
<url><loc>https://scifaro.com/en/abs/codec-superb-an-in-depth-analysis-of-sound-codec-models-2402.13071</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codec-superb-an-in-depth-analysis-of-sound-codec-models-2402.13071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codec-superb-an-in-depth-analysis-of-sound-codec-models-2402.13071"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-self-supervised-learning-models-2402.13199</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-self-supervised-learning-models-2402.13199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-extraction-with-pre-trained-self-supervised-learning-models-2402.13199"/></url>
<url><loc>https://scifaro.com/en/abs/probing-self-supervised-learning-models-with-target-speech-extraction-2402.13200</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-self-supervised-learning-models-with-target-speech-extraction-2402.13200"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-self-supervised-learning-models-with-target-speech-extraction-2402.13200"/></url>
<url><loc>https://scifaro.com/en/abs/towards-audio-language-modeling-an-overview-2402.13236</loc><lastmod>2024-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-audio-language-modeling-an-overview-2402.13236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-audio-language-modeling-an-overview-2402.13236"/></url>
<url><loc>https://scifaro.com/en/abs/when-llms-meets-acoustic-landmarks-an-efficient-approach-to-integrate-speech-into-large-language-models-for-depression-detection-2402.13276</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/when-llms-meets-acoustic-landmarks-an-efficient-approach-to-integrate-speech-into-large-language-models-for-depression-detection-2402.13276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/when-llms-meets-acoustic-landmarks-an-efficient-approach-to-integrate-speech-into-large-language-models-for-depression-detection-2402.13276"/></url>
<url><loc>https://scifaro.com/en/abs/mel-fullsubnet-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2402.13511</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-fullsubnet-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2402.13511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-fullsubnet-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2402.13511"/></url>
<url><loc>https://scifaro.com/en/abs/homula-rir-a-room-impulse-response-dataset-for-teleconferencing-and-spatial-audio-applications-acquired-through-higher-order-microphones-and-uniform-linear-microphone-arrays-2402.13896</loc><lastmod>2024-02-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/homula-rir-a-room-impulse-response-dataset-for-teleconferencing-and-spatial-audio-applications-acquired-through-higher-order-microphones-and-uniform-linear-microphone-arrays-2402.13896"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/homula-rir-a-room-impulse-response-dataset-for-teleconferencing-and-spatial-audio-applications-acquired-through-higher-order-microphones-and-uniform-linear-microphone-arrays-2402.13896"/></url>
<url><loc>https://scifaro.com/en/abs/sicrn-advancing-speech-enhancement-through-state-space-model-and-inplace-convolution-techniques-2402.14225</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sicrn-advancing-speech-enhancement-through-state-space-model-and-inplace-convolution-techniques-2402.14225"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sicrn-advancing-speech-enhancement-through-state-space-model-and-inplace-convolution-techniques-2402.14225"/></url>
<url><loc>https://scifaro.com/en/abs/periodgrad-towards-pitch-controllable-neural-vocoder-based-on-a-diffusion-probabilistic-model-2402.14692</loc><lastmod>2024-02-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/periodgrad-towards-pitch-controllable-neural-vocoder-based-on-a-diffusion-probabilistic-model-2402.14692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/periodgrad-towards-pitch-controllable-neural-vocoder-based-on-a-diffusion-probabilistic-model-2402.14692"/></url>
<url><loc>https://scifaro.com/en/abs/childaugment-data-augmentation-methods-for-zero-resource-children-s-speaker-verification-2402.15214</loc><lastmod>2024-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/childaugment-data-augmentation-methods-for-zero-resource-children-s-speaker-verification-2402.15214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/childaugment-data-augmentation-methods-for-zero-resource-children-s-speaker-verification-2402.15214"/></url>
<url><loc>https://scifaro.com/en/abs/high-resolution-guitar-transcription-via-domain-adaptation-2402.15258</loc><lastmod>2024-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-resolution-guitar-transcription-via-domain-adaptation-2402.15258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-resolution-guitar-transcription-via-domain-adaptation-2402.15258"/></url>
<url><loc>https://scifaro.com/en/abs/speech-corpus-for-korean-children-with-autism-spectrum-disorder-towards-automatic-assessment-systems-2402.15539</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-corpus-for-korean-children-with-autism-spectrum-disorder-towards-automatic-assessment-systems-2402.15539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-corpus-for-korean-children-with-autism-spectrum-disorder-towards-automatic-assessment-systems-2402.15539"/></url>
<url><loc>https://scifaro.com/en/abs/toward-fully-self-supervised-multi-pitch-estimation-2402.15569</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-fully-self-supervised-multi-pitch-estimation-2402.15569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-fully-self-supervised-multi-pitch-estimation-2402.15569"/></url>
<url><loc>https://scifaro.com/en/abs/text-guided-hubert-self-supervised-speech-pre-training-via-generative-adversarial-networks-2402.15725</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-guided-hubert-self-supervised-speech-pre-training-via-generative-adversarial-networks-2402.15725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-guided-hubert-self-supervised-speech-pre-training-via-generative-adversarial-networks-2402.15725"/></url>
<url><loc>https://scifaro.com/en/abs/a-circular-microphone-array-with-virtual-microphones-based-on-acoustics-informed-neural-networks-2402.15735</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-circular-microphone-array-with-virtual-microphones-based-on-acoustics-informed-neural-networks-2402.15735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-circular-microphone-array-with-virtual-microphones-based-on-acoustics-informed-neural-networks-2402.15735"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-power-of-pure-attention-mechanisms-in-blind-room-parameter-estimation-2402.16003</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-power-of-pure-attention-mechanisms-in-blind-room-parameter-estimation-2402.16003"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-power-of-pure-attention-mechanisms-in-blind-room-parameter-estimation-2402.16003"/></url>
<url><loc>https://scifaro.com/en/abs/an-automated-end-to-end-open-source-software-for-high-quality-text-to-speech-dataset-generation-2402.16380</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-automated-end-to-end-open-source-software-for-high-quality-text-to-speech-dataset-generation-2402.16380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-automated-end-to-end-open-source-software-for-high-quality-text-to-speech-dataset-generation-2402.16380"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-speech-enhancement-in-noisy-environments-via-emotion-based-contextual-cues-2402.16394</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-in-noisy-environments-via-emotion-based-contextual-cues-2402.16394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-speech-enhancement-in-noisy-environments-via-emotion-based-contextual-cues-2402.16394"/></url>
<url><loc>https://scifaro.com/en/abs/skill-similarity-aware-knowledge-distillation-for-speech-self-supervised-learning-2402.16830</loc><lastmod>2024-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/skill-similarity-aware-knowledge-distillation-for-speech-self-supervised-learning-2402.16830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/skill-similarity-aware-knowledge-distillation-for-speech-self-supervised-learning-2402.16830"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-extraction-by-directly-exploiting-contextual-information-in-the-time-frequency-domain-2402.17146</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-extraction-by-directly-exploiting-contextual-information-in-the-time-frequency-domain-2402.17146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-extraction-by-directly-exploiting-contextual-information-in-the-time-frequency-domain-2402.17146"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonics-encoding-for-arbitrary-microphone-arrays-incorporating-residual-channels-for-binaural-reproduction-2402.17362</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonics-encoding-for-arbitrary-microphone-arrays-incorporating-residual-channels-for-binaural-reproduction-2402.17362"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonics-encoding-for-arbitrary-microphone-arrays-incorporating-residual-channels-for-binaural-reproduction-2402.17362"/></url>
<url><loc>https://scifaro.com/en/abs/clapsep-leveraging-contrastive-pre-trained-model-for-multi-modal-query-conditioned-target-sound-extraction-2402.17455</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clapsep-leveraging-contrastive-pre-trained-model-for-multi-modal-query-conditioned-target-sound-extraction-2402.17455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clapsep-leveraging-contrastive-pre-trained-model-for-multi-modal-query-conditioned-target-sound-extraction-2402.17455"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-low-latency-music-source-separation-using-hybrid-spectrogram-tasnet-2402.17701</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-low-latency-music-source-separation-using-hybrid-spectrogram-tasnet-2402.17701"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-low-latency-music-source-separation-using-hybrid-spectrogram-tasnet-2402.17701"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-neural-phonetic-posteriorgrams-2402.17735</loc><lastmod>2024-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-neural-phonetic-posteriorgrams-2402.17735"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-neural-phonetic-posteriorgrams-2402.17735"/></url>
<url><loc>https://scifaro.com/en/abs/niirf-neural-iir-filter-field-for-hrtf-upsampling-and-personalization-2402.17907</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/niirf-neural-iir-filter-field-for-hrtf-upsampling-and-personalization-2402.17907"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/niirf-neural-iir-filter-field-for-hrtf-upsampling-and-personalization-2402.17907"/></url>
<url><loc>https://scifaro.com/en/abs/why-does-music-source-separation-benefit-from-cacophony-2402.18407</loc><lastmod>2024-02-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-does-music-source-separation-benefit-from-cacophony-2402.18407"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-does-music-source-separation-benefit-from-cacophony-2402.18407"/></url>
<url><loc>https://scifaro.com/en/abs/extending-multilingual-speech-synthesis-to-100-languages-without-transcribed-data-2402.18932</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extending-multilingual-speech-synthesis-to-100-languages-without-transcribed-data-2402.18932"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extending-multilingual-speech-synthesis-to-100-languages-without-transcribed-data-2402.18932"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonics-networks-the-effect-of-radial-functions-regularization-2402.18968</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonics-networks-the-effect-of-radial-functions-regularization-2402.18968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonics-networks-the-effect-of-radial-functions-regularization-2402.18968"/></url>
<url><loc>https://scifaro.com/en/abs/a-sound-approach-using-large-language-models-to-generate-audio-descriptions-for-egocentric-text-audio-retrieval-2402.19106</loc><lastmod>2024-03-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-sound-approach-using-large-language-models-to-generate-audio-descriptions-for-egocentric-text-audio-retrieval-2402.19106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-sound-approach-using-large-language-models-to-generate-audio-descriptions-for-egocentric-text-audio-retrieval-2402.19106"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-adapter-tuning-of-pre-trained-speech-models-for-automatic-speaker-verification-2403.00293</loc><lastmod>2024-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-adapter-tuning-of-pre-trained-speech-models-for-automatic-speaker-verification-2403.00293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-adapter-tuning-of-pre-trained-speech-models-for-automatic-speaker-verification-2403.00293"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-frequency-bands-on-acoustic-anomaly-detection-of-machines-using-deep-learning-based-model-2403.00379</loc><lastmod>2024-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-frequency-bands-on-acoustic-anomaly-detection-of-machines-using-deep-learning-based-model-2403.00379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-frequency-bands-on-acoustic-anomaly-detection-of-machines-using-deep-learning-based-model-2403.00379"/></url>
<url><loc>https://scifaro.com/en/abs/segaa-a-unified-approach-to-predicting-age-gender-and-emotion-in-speech-2403.00887</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/segaa-a-unified-approach-to-predicting-age-gender-and-emotion-in-speech-2403.00887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/segaa-a-unified-approach-to-predicting-age-gender-and-emotion-in-speech-2403.00887"/></url>
<url><loc>https://scifaro.com/en/abs/advanced-signal-analysis-in-detecting-replay-attacks-for-automatic-speaker-verification-systems-2403.01130</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advanced-signal-analysis-in-detecting-replay-attacks-for-automatic-speaker-verification-systems-2403.01130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advanced-signal-analysis-in-detecting-replay-attacks-for-automatic-speaker-verification-systems-2403.01130"/></url>
<url><loc>https://scifaro.com/en/abs/a-dcf-an-architecture-agnostic-metric-with-application-to-spoofing-robust-speaker-verification-2403.01355</loc><lastmod>2025-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dcf-an-architecture-agnostic-metric-with-application-to-spoofing-robust-speaker-verification-2403.01355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dcf-an-architecture-agnostic-metric-with-application-to-spoofing-robust-speaker-verification-2403.01355"/></url>
<url><loc>https://scifaro.com/en/abs/a-closer-look-at-wav2vec2-embeddings-for-on-device-single-channel-speech-enhancement-2403.01369</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-closer-look-at-wav2vec2-embeddings-for-on-device-single-channel-speech-enhancement-2403.01369"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-closer-look-at-wav2vec2-embeddings-for-on-device-single-channel-speech-enhancement-2403.01369"/></url>
<url><loc>https://scifaro.com/en/abs/pavits-exploring-prosody-aware-vits-for-end-to-end-emotional-voice-conversion-2403.01494</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pavits-exploring-prosody-aware-vits-for-end-to-end-emotional-voice-conversion-2403.01494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pavits-exploring-prosody-aware-vits-for-end-to-end-emotional-voice-conversion-2403.01494"/></url>
<url><loc>https://scifaro.com/en/abs/6dof-seld-sound-event-localization-and-detection-using-microphones-and-motion-tracking-sensors-on-self-motioning-human-2403.01670</loc><lastmod>2024-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/6dof-seld-sound-event-localization-and-detection-using-microphones-and-motion-tracking-sensors-on-self-motioning-human-2403.01670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/6dof-seld-sound-event-localization-and-detection-using-microphones-and-motion-tracking-sensors-on-self-motioning-human-2403.01670"/></url>
<url><loc>https://scifaro.com/en/abs/emovome-a-dataset-for-emotion-recognition-in-spontaneous-real-life-speech-2403.02167</loc><lastmod>2024-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emovome-a-dataset-for-emotion-recognition-in-spontaneous-real-life-speech-2403.02167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emovome-a-dataset-for-emotion-recognition-in-spontaneous-real-life-speech-2403.02167"/></url>
<url><loc>https://scifaro.com/en/abs/pixit-joint-training-of-speaker-diarization-and-speech-separation-from-real-world-multi-speaker-recordings-2403.02288</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pixit-joint-training-of-speaker-diarization-and-speech-separation-from-real-world-multi-speaker-recordings-2403.02288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pixit-joint-training-of-speaker-diarization-and-speech-separation-from-real-world-multi-speaker-recordings-2403.02288"/></url>
<url><loc>https://scifaro.com/en/abs/neurovoz-a-castillian-spanish-corpus-of-parkinsonian-speech-2403.02371</loc><lastmod>2025-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neurovoz-a-castillian-spanish-corpus-of-parkinsonian-speech-2403.02371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neurovoz-a-castillian-spanish-corpus-of-parkinsonian-speech-2403.02371"/></url>
<url><loc>https://scifaro.com/en/abs/naturalspeech-3-zero-shot-speech-synthesis-with-factorized-codec-and-diffusion-models-2403.03100</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/naturalspeech-3-zero-shot-speech-synthesis-with-factorized-codec-and-diffusion-models-2403.03100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/naturalspeech-3-zero-shot-speech-synthesis-with-factorized-codec-and-diffusion-models-2403.03100"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-performance-of-spectrogram-and-scalogram-as-input-of-acoustic-recognition-task-2403.03611</loc><lastmod>2025-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-performance-of-spectrogram-and-scalogram-as-input-of-acoustic-recognition-task-2403.03611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-performance-of-spectrogram-and-scalogram-as-input-of-acoustic-recognition-task-2403.03611"/></url>
<url><loc>https://scifaro.com/en/abs/tweaking-autoregressive-methods-for-inpainting-of-gaps-in-audio-signals-2403.04433</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tweaking-autoregressive-methods-for-inpainting-of-gaps-in-audio-signals-2403.04433"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tweaking-autoregressive-methods-for-inpainting-of-gaps-in-audio-signals-2403.04433"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-via-cnn-transformer-and-multidimensional-attention-mechanism-2403.04743</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-via-cnn-transformer-and-multidimensional-attention-mechanism-2403.04743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-via-cnn-transformer-and-multidimensional-attention-mechanism-2403.04743"/></url>
<url><loc>https://scifaro.com/en/abs/un-paired-signal-to-signal-translation-with-1d-conditional-gans-2403.04800</loc><lastmod>2024-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/un-paired-signal-to-signal-translation-with-1d-conditional-gans-2403.04800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/un-paired-signal-to-signal-translation-with-1d-conditional-gans-2403.04800"/></url>
<url><loc>https://scifaro.com/en/abs/attentionstitch-how-attention-solves-the-speech-editing-problem-2403.04804</loc><lastmod>2024-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentionstitch-how-attention-solves-the-speech-editing-problem-2403.04804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentionstitch-how-attention-solves-the-speech-editing-problem-2403.04804"/></url>
<url><loc>https://scifaro.com/en/abs/robust-semantic-communications-for-speech-transmission-2403.05187</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-semantic-communications-for-speech-transmission-2403.05187"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-semantic-communications-for-speech-transmission-2403.05187"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-speech-enhancement-using-deep-complex-convolutional-transformer-networks-2403.05393</loc><lastmod>2024-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-speech-enhancement-using-deep-complex-convolutional-transformer-networks-2403.05393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-speech-enhancement-using-deep-complex-convolutional-transformer-networks-2403.05393"/></url>
<url><loc>https://scifaro.com/en/abs/asynchronous-microphone-array-calibration-using-hybrid-tdoa-information-2403.05791</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asynchronous-microphone-array-calibration-using-hybrid-tdoa-information-2403.05791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asynchronous-microphone-array-calibration-using-hybrid-tdoa-information-2403.05791"/></url>
<url><loc>https://scifaro.com/en/abs/aligning-speech-to-languages-to-enhance-code-switching-speech-recognition-2403.05887</loc><lastmod>2025-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aligning-speech-to-languages-to-enhance-code-switching-speech-recognition-2403.05887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aligning-speech-to-languages-to-enhance-code-switching-speech-recognition-2403.05887"/></url>
<url><loc>https://scifaro.com/en/abs/sonotracelab-a-raytracing-based-acoustic-modelling-system-for-simulating-echolocation-behavior-of-bats-2403.06847</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sonotracelab-a-raytracing-based-acoustic-modelling-system-for-simulating-echolocation-behavior-of-bats-2403.06847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sonotracelab-a-raytracing-based-acoustic-modelling-system-for-simulating-echolocation-behavior-of-bats-2403.06847"/></url>
<url><loc>https://scifaro.com/en/abs/concurrent-speaker-detection-a-multi-microphone-transformer-based-approach-2403.06856</loc><lastmod>2024-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concurrent-speaker-detection-a-multi-microphone-transformer-based-approach-2403.06856"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concurrent-speaker-detection-a-multi-microphone-transformer-based-approach-2403.06856"/></url>
<url><loc>https://scifaro.com/en/abs/on-hrtf-notch-frequency-prediction-using-anthropometric-features-and-neural-networks-2403.07579</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-hrtf-notch-frequency-prediction-using-anthropometric-features-and-neural-networks-2403.07579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-hrtf-notch-frequency-prediction-using-anthropometric-features-and-neural-networks-2403.07579"/></url>
<url><loc>https://scifaro.com/en/abs/gender-ambiguous-voice-generation-through-feminine-speaking-style-transfer-in-male-voices-2403.07661</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-ambiguous-voice-generation-through-feminine-speaking-style-transfer-in-male-voices-2403.07661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-ambiguous-voice-generation-through-feminine-speaking-style-transfer-in-male-voices-2403.07661"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-the-labels-unveiling-text-dependency-in-paralinguistic-speech-recognition-datasets-2403.07767</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-the-labels-unveiling-text-dependency-in-paralinguistic-speech-recognition-datasets-2403.07767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-the-labels-unveiling-text-dependency-in-paralinguistic-speech-recognition-datasets-2403.07767"/></url>
<url><loc>https://scifaro.com/en/abs/speech-robust-bench-a-robustness-benchmark-for-speech-recognition-2403.07937</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-robust-bench-a-robustness-benchmark-for-speech-recognition-2403.07937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-robust-bench-a-robustness-benchmark-for-speech-recognition-2403.07937"/></url>
<url><loc>https://scifaro.com/en/abs/the-evaluation-of-a-code-switched-sepedi-english-automatic-speech-recognition-system-2403.07947</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-evaluation-of-a-code-switched-sepedi-english-automatic-speech-recognition-system-2403.07947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-evaluation-of-a-code-switched-sepedi-english-automatic-speech-recognition-system-2403.07947"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-end-to-end-approach-to-noise-invariant-speech-features-via-multi-task-learning-2403.08654</loc><lastmod>2024-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-end-to-end-approach-to-noise-invariant-speech-features-via-multi-task-learning-2403.08654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-end-to-end-approach-to-noise-invariant-speech-features-via-multi-task-learning-2403.08654"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-neural-network-for-volumetric-sound-field-reconstruction-of-speech-signals-2403.09524</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-neural-network-for-volumetric-sound-field-reconstruction-of-speech-signals-2403.09524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-neural-network-for-volumetric-sound-field-reconstruction-of-speech-signals-2403.09524"/></url>
<url><loc>https://scifaro.com/en/abs/wavcraft-audio-editing-and-generation-with-large-language-models-2403.09527</loc><lastmod>2024-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavcraft-audio-editing-and-generation-with-large-language-models-2403.09527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavcraft-audio-editing-and-generation-with-large-language-models-2403.09527"/></url>
<url><loc>https://scifaro.com/en/abs/audiosockets-a-python-socket-package-for-real-time-audio-processing-2403.09789</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiosockets-a-python-socket-package-for-real-time-audio-processing-2403.09789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiosockets-a-python-socket-package-for-real-time-audio-processing-2403.09789"/></url>
<url><loc>https://scifaro.com/en/abs/superm2m-supervised-and-mixture-to-mixture-co-learning-for-speech-enhancement-and-noise-robust-asr-2403.10271</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/superm2m-supervised-and-mixture-to-mixture-co-learning-for-speech-enhancement-and-noise-robust-asr-2403.10271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/superm2m-supervised-and-mixture-to-mixture-co-learning-for-speech-enhancement-and-noise-robust-asr-2403.10271"/></url>
<url><loc>https://scifaro.com/en/abs/hearing-loss-compensation-using-deep-neural-networks-a-framework-and-results-from-a-listening-test-2403.10420</loc><lastmod>2024-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hearing-loss-compensation-using-deep-neural-networks-a-framework-and-results-from-a-listening-test-2403.10420"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hearing-loss-compensation-using-deep-neural-networks-a-framework-and-results-from-a-listening-test-2403.10420"/></url>
<url><loc>https://scifaro.com/en/abs/how-to-train-your-ears-auditory-model-emulation-for-large-dynamic-range-inputs-and-mild-to-severe-hearing-losses-2403.10428</loc><lastmod>2024-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-to-train-your-ears-auditory-model-emulation-for-large-dynamic-range-inputs-and-mild-to-severe-hearing-losses-2403.10428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-to-train-your-ears-auditory-model-emulation-for-large-dynamic-range-inputs-and-mild-to-severe-hearing-losses-2403.10428"/></url>
<url><loc>https://scifaro.com/en/abs/two-sided-acoustic-metascreen-for-broadband-and-individual-reflection-and-transmission-control-2403.10548</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/two-sided-acoustic-metascreen-for-broadband-and-individual-reflection-and-transmission-control-2403.10548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/two-sided-acoustic-metascreen-for-broadband-and-individual-reflection-and-transmission-control-2403.10548"/></url>
<url><loc>https://scifaro.com/en/abs/ptsd-mdnn-fusion-tardive-de-r-eseaux-de-neurones-profonds-multimodaux-pour-la-d-etection-du-trouble-de-stress-post-traumatique-2403.10565</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ptsd-mdnn-fusion-tardive-de-r-eseaux-de-neurones-profonds-multimodaux-pour-la-d-etection-du-trouble-de-stress-post-traumatique-2403.10565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ptsd-mdnn-fusion-tardive-de-r-eseaux-de-neurones-profonds-multimodaux-pour-la-d-etection-du-trouble-de-stress-post-traumatique-2403.10565"/></url>
<url><loc>https://scifaro.com/en/abs/refining-knowledge-transfer-on-audio-image-temporal-agreement-for-audio-text-cross-retrieval-2403.10756</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refining-knowledge-transfer-on-audio-image-temporal-agreement-for-audio-text-cross-retrieval-2403.10756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refining-knowledge-transfer-on-audio-image-temporal-agreement-for-audio-text-cross-retrieval-2403.10756"/></url>
<url><loc>https://scifaro.com/en/abs/initial-decoding-with-minimally-augmented-language-model-for-improved-lattice-rescoring-in-low-resource-asr-2403.10937</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/initial-decoding-with-minimally-augmented-language-model-for-improved-lattice-rescoring-in-low-resource-asr-2403.10937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/initial-decoding-with-minimally-augmented-language-model-for-improved-lattice-rescoring-in-low-resource-asr-2403.10937"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-engine-fault-sound-event-detection-using-multimodal-signals-2403.11037</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-engine-fault-sound-event-detection-using-multimodal-signals-2403.11037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-engine-fault-sound-event-detection-using-multimodal-signals-2403.11037"/></url>
<url><loc>https://scifaro.com/en/abs/discriminative-neighborhood-smoothing-for-generative-anomalous-sound-detection-2403.11508</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discriminative-neighborhood-smoothing-for-generative-anomalous-sound-detection-2403.11508"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discriminative-neighborhood-smoothing-for-generative-anomalous-sound-detection-2403.11508"/></url>
<url><loc>https://scifaro.com/en/abs/adamer-ctc-connectionist-temporal-classification-with-adaptive-maximum-entropy-regularization-for-automatic-speech-recognition-2403.11578</loc><lastmod>2024-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adamer-ctc-connectionist-temporal-classification-with-adaptive-maximum-entropy-regularization-for-automatic-speech-recognition-2403.11578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adamer-ctc-connectionist-temporal-classification-with-adaptive-maximum-entropy-regularization-for-automatic-speech-recognition-2403.11578"/></url>
<url><loc>https://scifaro.com/en/abs/latent-clap-loss-for-better-foley-sound-synthesis-2403.12182</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/latent-clap-loss-for-better-foley-sound-synthesis-2403.12182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/latent-clap-loss-for-better-foley-sound-synthesis-2403.12182"/></url>
<url><loc>https://scifaro.com/en/abs/a-multi-loudspeaker-binaural-room-impulse-response-dataset-with-high-resolution-translational-and-rotational-head-coordinates-in-a-listening-room-2403.12258</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multi-loudspeaker-binaural-room-impulse-response-dataset-with-high-resolution-translational-and-rotational-head-coordinates-in-a-listening-room-2403.12258"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multi-loudspeaker-binaural-room-impulse-response-dataset-with-high-resolution-translational-and-rotational-head-coordinates-in-a-listening-room-2403.12258"/></url>
<url><loc>https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-circular-listening-area-2403.12630</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-circular-listening-area-2403.12630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reproducing-the-acoustic-velocity-vectors-in-a-circular-listening-area-2403.12630"/></url>
<url><loc>https://scifaro.com/en/abs/tdt-kws-fast-and-accurate-keyword-spotting-using-token-and-duration-transducer-2403.13332</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tdt-kws-fast-and-accurate-keyword-spotting-using-token-and-duration-transducer-2403.13332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tdt-kws-fast-and-accurate-keyword-spotting-using-token-and-duration-transducer-2403.13332"/></url>
<url><loc>https://scifaro.com/en/abs/kunqudb-an-attempt-for-speaker-verification-in-the-chinese-opera-scenario-2403.13356</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kunqudb-an-attempt-for-speaker-verification-in-the-chinese-opera-scenario-2403.13356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kunqudb-an-attempt-for-speaker-verification-in-the-chinese-opera-scenario-2403.13356"/></url>
<url><loc>https://scifaro.com/en/abs/banglanum-a-public-dataset-for-bengali-digit-recognition-from-speech-2403.13465</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/banglanum-a-public-dataset-for-bengali-digit-recognition-from-speech-2403.13465"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/banglanum-a-public-dataset-for-bengali-digit-recognition-from-speech-2403.13465"/></url>
<url><loc>https://scifaro.com/en/abs/vibration-sensitivity-of-one-port-and-two-port-mems-microphones-2403.13643</loc><lastmod>2024-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vibration-sensitivity-of-one-port-and-two-port-mems-microphones-2403.13643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vibration-sensitivity-of-one-port-and-two-port-mems-microphones-2403.13643"/></url>
<url><loc>https://scifaro.com/en/abs/adaproj-adaptively-scaled-angular-margin-subspace-projections-for-anomalous-sound-detection-with-auxiliary-classification-tasks-2403.14179</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaproj-adaptively-scaled-angular-margin-subspace-projections-for-anomalous-sound-detection-with-auxiliary-classification-tasks-2403.14179"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaproj-adaptively-scaled-angular-margin-subspace-projections-for-anomalous-sound-detection-with-auxiliary-classification-tasks-2403.14179"/></url>
<url><loc>https://scifaro.com/en/abs/catse-a-context-aware-framework-for-causal-target-sound-extraction-2403.14246</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/catse-a-context-aware-framework-for-causal-target-sound-extraction-2403.14246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/catse-a-context-aware-framework-for-causal-target-sound-extraction-2403.14246"/></url>
<url><loc>https://scifaro.com/en/abs/speech-aware-neural-diarization-with-encoder-decoder-attractor-guided-by-attention-constraints-2403.14268</loc><lastmod>2024-03-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-aware-neural-diarization-with-encoder-decoder-attractor-guided-by-attention-constraints-2403.14268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-aware-neural-diarization-with-encoder-decoder-attractor-guided-by-attention-constraints-2403.14268"/></url>
<url><loc>https://scifaro.com/en/abs/crowdsourced-multilingual-speech-intelligibility-testing-2403.14817</loc><lastmod>2024-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crowdsourced-multilingual-speech-intelligibility-testing-2403.14817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crowdsourced-multilingual-speech-intelligibility-testing-2403.14817"/></url>
<url><loc>https://scifaro.com/en/abs/dialogue-understandability-why-are-we-streaming-movies-with-subtitles-2403.15336</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dialogue-understandability-why-are-we-streaming-movies-with-subtitles-2403.15336"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dialogue-understandability-why-are-we-streaming-movies-with-subtitles-2403.15336"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-intelligence-for-cochlear-implants-review-of-strategies-challenges-and-perspectives-2403.15442</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-intelligence-for-cochlear-implants-review-of-strategies-challenges-and-perspectives-2403.15442"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-intelligence-for-cochlear-implants-review-of-strategies-challenges-and-perspectives-2403.15442"/></url>
<url><loc>https://scifaro.com/en/abs/distributed-collaborative-anomalous-sound-detection-by-embedding-sharing-2403.16610</loc><lastmod>2024-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distributed-collaborative-anomalous-sound-detection-by-embedding-sharing-2403.16610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distributed-collaborative-anomalous-sound-detection-by-embedding-sharing-2403.16610"/></url>
<url><loc>https://scifaro.com/en/abs/voicecraft-zero-shot-speech-editing-and-text-to-speech-in-the-wild-2403.16973</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicecraft-zero-shot-speech-editing-and-text-to-speech-in-the-wild-2403.16973"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicecraft-zero-shot-speech-editing-and-text-to-speech-in-the-wild-2403.16973"/></url>
<url><loc>https://scifaro.com/en/abs/infrastructure-less-localization-from-indoor-environmental-sounds-based-on-spectral-decomposition-and-spatial-likelihood-model-2403.17402</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infrastructure-less-localization-from-indoor-environmental-sounds-based-on-spectral-decomposition-and-spatial-likelihood-model-2403.17402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infrastructure-less-localization-from-indoor-environmental-sounds-based-on-spectral-decomposition-and-spatial-likelihood-model-2403.17402"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-distance-estimation-in-enclosures-from-single-channel-audio-2403.17514</loc><lastmod>2024-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-distance-estimation-in-enclosures-from-single-channel-audio-2403.17514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-distance-estimation-in-enclosures-from-single-channel-audio-2403.17514"/></url>
<url><loc>https://scifaro.com/en/abs/synthetic-training-set-generation-using-text-to-audio-models-for-environmental-sound-classification-2403.17864</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthetic-training-set-generation-using-text-to-audio-models-for-environmental-sound-classification-2403.17864"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthetic-training-set-generation-using-text-to-audio-models-for-environmental-sound-classification-2403.17864"/></url>
<url><loc>https://scifaro.com/en/abs/dual-path-mamba-short-and-long-term-bidirectional-selective-structured-state-space-models-for-speech-separation-2403.18257</loc><lastmod>2024-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-path-mamba-short-and-long-term-bidirectional-selective-structured-state-space-models-for-speech-separation-2403.18257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-path-mamba-short-and-long-term-bidirectional-selective-structured-state-space-models-for-speech-separation-2403.18257"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-keyword-spotting-through-self-supervised-pretraining-2403.18560</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-keyword-spotting-through-self-supervised-pretraining-2403.18560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-keyword-spotting-through-self-supervised-pretraining-2403.18560"/></url>
<url><loc>https://scifaro.com/en/abs/a-diffusion-based-generative-equalizer-for-music-restoration-2403.18636</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-diffusion-based-generative-equalizer-for-music-restoration-2403.18636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-diffusion-based-generative-equalizer-for-music-restoration-2403.18636"/></url>
<url><loc>https://scifaro.com/en/abs/mind-the-domain-gap-a-systematic-analysis-on-bioacoustic-sound-event-detection-2403.18638</loc><lastmod>2024-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mind-the-domain-gap-a-systematic-analysis-on-bioacoustic-sound-event-detection-2403.18638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mind-the-domain-gap-a-systematic-analysis-on-bioacoustic-sound-event-detection-2403.18638"/></url>
<url><loc>https://scifaro.com/en/abs/lv-ctc-non-autoregressive-asr-with-ctc-and-latent-variable-models-2403.19207</loc><lastmod>2024-03-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lv-ctc-non-autoregressive-asr-with-ctc-and-latent-variable-models-2403.19207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lv-ctc-non-autoregressive-asr-with-ctc-and-latent-variable-models-2403.19207"/></url>
<url><loc>https://scifaro.com/en/abs/blind-identification-of-binaural-room-impulse-responses-from-smart-glasses-2403.19217</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-identification-of-binaural-room-impulse-responses-from-smart-glasses-2403.19217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-identification-of-binaural-room-impulse-responses-from-smart-glasses-2403.19217"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-recurrent-adapters-for-efficient-multi-task-adaptation-of-large-speech-models-2403.19709</loc><lastmod>2024-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-recurrent-adapters-for-efficient-multi-task-adaptation-of-large-speech-models-2403.19709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-recurrent-adapters-for-efficient-multi-task-adaptation-of-large-speech-models-2403.19709"/></url>
<url><loc>https://scifaro.com/en/abs/3d-speaker-toolkit-an-open-source-toolkit-for-multimodal-speaker-verification-and-diarization-2403.19971</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/3d-speaker-toolkit-an-open-source-toolkit-for-multimodal-speaker-verification-and-diarization-2403.19971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/3d-speaker-toolkit-an-open-source-toolkit-for-multimodal-speaker-verification-and-diarization-2403.19971"/></url>
<url><loc>https://scifaro.com/en/abs/non-exponential-reverberation-modeling-using-dark-velvet-noise-2403.20090</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-exponential-reverberation-modeling-using-dark-velvet-noise-2403.20090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-exponential-reverberation-modeling-using-dark-velvet-noise-2403.20090"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-pathological-speech-quality-assessment-with-asr-powered-wav2vec2-in-data-scarce-context-2403.20184</loc><lastmod>2024-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-pathological-speech-quality-assessment-with-asr-powered-wav2vec2-in-data-scarce-context-2403.20184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-pathological-speech-quality-assessment-with-asr-powered-wav2vec2-in-data-scarce-context-2403.20184"/></url>
<url><loc>https://scifaro.com/en/abs/data-driven-room-acoustic-modeling-via-differentiable-feedback-delay-networks-with-learnable-delay-lines-2404.00082</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-driven-room-acoustic-modeling-via-differentiable-feedback-delay-networks-with-learnable-delay-lines-2404.00082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-driven-room-acoustic-modeling-via-differentiable-feedback-delay-networks-with-learnable-delay-lines-2404.00082"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-properties-of-speech-language-models-2404.00685</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-properties-of-speech-language-models-2404.00685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-properties-of-speech-language-models-2404.00685"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneity-over-homogeneity-investigating-multilingual-speech-pre-trained-models-for-detecting-audio-deepfake-2404.00809</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneity-over-homogeneity-investigating-multilingual-speech-pre-trained-models-for-detecting-audio-deepfake-2404.00809"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneity-over-homogeneity-investigating-multilingual-speech-pre-trained-models-for-detecting-audio-deepfake-2404.00809"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-real-world-active-speaker-detection-with-multi-modal-extraction-pre-training-2404.00861</loc><lastmod>2024-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-real-world-active-speaker-detection-with-multi-modal-extraction-pre-training-2404.00861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-real-world-active-speaker-detection-with-multi-modal-extraction-pre-training-2404.00861"/></url>
<url><loc>https://scifaro.com/en/abs/voice-conversion-augmentation-for-speaker-recognition-on-defective-datasets-2404.00863</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-conversion-augmentation-for-speaker-recognition-on-defective-datasets-2404.00863"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-conversion-augmentation-for-speaker-recognition-on-defective-datasets-2404.00863"/></url>
<url><loc>https://scifaro.com/en/abs/kazemotts-a-dataset-for-kazakh-emotional-text-to-speech-synthesis-2404.01033</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kazemotts-a-dataset-for-kazakh-emotional-text-to-speech-synthesis-2404.01033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kazemotts-a-dataset-for-kazakh-emotional-text-to-speech-synthesis-2404.01033"/></url>
<url><loc>https://scifaro.com/en/abs/effective-internal-language-model-training-and-fusion-for-factorized-transducer-model-2404.01716</loc><lastmod>2024-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-internal-language-model-training-and-fusion-for-factorized-transducer-model-2404.01716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-internal-language-model-training-and-fusion-for-factorized-transducer-model-2404.01716"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-from-whisper-for-microscopic-intelligibility-prediction-2404.01737</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-from-whisper-for-microscopic-intelligibility-prediction-2404.01737"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-from-whisper-for-microscopic-intelligibility-prediction-2404.01737"/></url>
<url><loc>https://scifaro.com/en/abs/the-voiceprivacy-2024-challenge-evaluation-plan-2404.02677</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voiceprivacy-2024-challenge-evaluation-plan-2404.02677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voiceprivacy-2024-challenge-evaluation-plan-2404.02677"/></url>
<url><loc>https://scifaro.com/en/abs/clam-tts-improving-neural-codec-language-model-for-zero-shot-text-to-speech-2404.02781</loc><lastmod>2024-04-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clam-tts-improving-neural-codec-language-model-for-zero-shot-text-to-speech-2404.02781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clam-tts-improving-neural-codec-language-model-for-zero-shot-text-to-speech-2404.02781"/></url>
<url><loc>https://scifaro.com/en/abs/rall-e-robust-codec-language-modeling-with-chain-of-thought-prompting-for-text-to-speech-synthesis-2404.03204</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rall-e-robust-codec-language-modeling-with-chain-of-thought-prompting-for-text-to-speech-synthesis-2404.03204"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rall-e-robust-codec-language-modeling-with-chain-of-thought-prompting-for-text-to-speech-synthesis-2404.03204"/></url>
<url><loc>https://scifaro.com/en/abs/interpreting-end-to-end-deep-learning-models-for-speech-source-localization-using-layer-wise-relevance-propagation-2404.03436</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpreting-end-to-end-deep-learning-models-for-speech-source-localization-using-layer-wise-relevance-propagation-2404.03436"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpreting-end-to-end-deep-learning-models-for-speech-source-localization-using-layer-wise-relevance-propagation-2404.03436"/></url>
<url><loc>https://scifaro.com/en/abs/a-self-attention-driven-deep-denoiser-model-for-real-time-lung-sound-denoising-in-noisy-environments-2404.04365</loc><lastmod>2025-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-self-attention-driven-deep-denoiser-model-for-real-time-lung-sound-denoising-in-noisy-environments-2404.04365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-self-attention-driven-deep-denoiser-model-for-real-time-lung-sound-denoising-in-noisy-environments-2404.04365"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-non-negative-matrix-factorization-with-implicit-neural-representations-2404.04439</loc><lastmod>2025-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-non-negative-matrix-factorization-with-implicit-neural-representations-2404.04439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-non-negative-matrix-factorization-with-implicit-neural-representations-2404.04439"/></url>
<url><loc>https://scifaro.com/en/abs/gull-a-generative-multifunctional-audio-codec-2404.04947</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gull-a-generative-multifunctional-audio-codec-2404.04947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gull-a-generative-multifunctional-audio-codec-2404.04947"/></url>
<url><loc>https://scifaro.com/en/abs/the-x-lance-technical-report-for-interspeech-2024-speech-processing-using-discrete-speech-unit-challenge-2404.06079</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-x-lance-technical-report-for-interspeech-2024-speech-processing-using-discrete-speech-unit-challenge-2404.06079"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-x-lance-technical-report-for-interspeech-2024-speech-processing-using-discrete-speech-unit-challenge-2404.06079"/></url>
<url><loc>https://scifaro.com/en/abs/masked-modeling-duo-towards-a-universal-audio-pre-training-framework-2404.06095</loc><lastmod>2024-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masked-modeling-duo-towards-a-universal-audio-pre-training-framework-2404.06095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masked-modeling-duo-towards-a-universal-audio-pre-training-framework-2404.06095"/></url>
<url><loc>https://scifaro.com/en/abs/covomix-advancing-zero-shot-speech-generation-for-human-like-multi-talker-conversations-2404.06690</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/covomix-advancing-zero-shot-speech-generation-for-human-like-multi-talker-conversations-2404.06690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/covomix-advancing-zero-shot-speech-generation-for-human-like-multi-talker-conversations-2404.06690"/></url>
<url><loc>https://scifaro.com/en/abs/what-is-learnt-by-the-learnable-front-end-leaf-adapting-per-channel-energy-normalisation-pcen-to-noisy-conditions-2404.06702</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-is-learnt-by-the-learnable-front-end-leaf-adapting-per-channel-energy-normalisation-pcen-to-noisy-conditions-2404.06702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-is-learnt-by-the-learnable-front-end-leaf-adapting-per-channel-energy-normalisation-pcen-to-noisy-conditions-2404.06702"/></url>
<url><loc>https://scifaro.com/en/abs/towards-efficient-and-real-time-piano-transcription-using-neural-autoregressive-models-2404.06818</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-efficient-and-real-time-piano-transcription-using-neural-autoregressive-models-2404.06818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-efficient-and-real-time-piano-transcription-using-neural-autoregressive-models-2404.06818"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-sound-field-reconstruction-with-conditional-invertible-neural-networks-2404.06928</loc><lastmod>2024-04-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-sound-field-reconstruction-with-conditional-invertible-neural-networks-2404.06928"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-sound-field-reconstruction-with-conditional-invertible-neural-networks-2404.06928"/></url>
<url><loc>https://scifaro.com/en/abs/houston-we-have-a-divergence-a-subgroup-performance-analysis-of-asr-models-2404.07226</loc><lastmod>2025-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/houston-we-have-a-divergence-a-subgroup-performance-analysis-of-asr-models-2404.07226"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/houston-we-have-a-divergence-a-subgroup-performance-analysis-of-asr-models-2404.07226"/></url>
<url><loc>https://scifaro.com/en/abs/conformer-1-robust-asr-via-large-scale-semisupervised-bootstrapping-2404.07341</loc><lastmod>2024-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformer-1-robust-asr-via-large-scale-semisupervised-bootstrapping-2404.07341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformer-1-robust-asr-via-large-scale-semisupervised-bootstrapping-2404.07341"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-all-pole-filters-for-time-varying-audio-systems-2404.07970</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-all-pole-filters-for-time-varying-audio-systems-2404.07970"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-all-pole-filters-for-time-varying-audio-systems-2404.07970"/></url>
<url><loc>https://scifaro.com/en/abs/the-impact-of-speech-anonymization-on-pathology-and-its-limits-2404.08064</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-impact-of-speech-anonymization-on-pathology-and-its-limits-2404.08064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-impact-of-speech-anonymization-on-pathology-and-its-limits-2404.08064"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-song-towards-controllable-music-generation-incorporating-vocals-and-accompaniment-2404.09313</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-song-towards-controllable-music-generation-incorporating-vocals-and-accompaniment-2404.09313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-song-towards-controllable-music-generation-incorporating-vocals-and-accompaniment-2404.09313"/></url>
<url><loc>https://scifaro.com/en/abs/a-large-scale-evaluation-of-speech-foundation-models-2404.09385</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-large-scale-evaluation-of-speech-foundation-models-2404.09385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-large-scale-evaluation-of-speech-foundation-models-2404.09385"/></url>
<url><loc>https://scifaro.com/en/abs/anatomy-of-industrial-scale-multilingual-asr-2404.09841</loc><lastmod>2024-04-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anatomy-of-industrial-scale-multilingual-asr-2404.09841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anatomy-of-industrial-scale-multilingual-asr-2404.09841"/></url>
<url><loc>https://scifaro.com/en/abs/wireless-earphone-based-real-time-monitoring-of-breathing-exercises-a-deep-learning-approach-2404.10310</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wireless-earphone-based-real-time-monitoring-of-breathing-exercises-a-deep-learning-approach-2404.10310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wireless-earphone-based-real-time-monitoring-of-breathing-exercises-a-deep-learning-approach-2404.10310"/></url>
<url><loc>https://scifaro.com/en/abs/mad-speech-measures-of-acoustic-diversity-of-speech-2404.10419</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mad-speech-measures-of-acoustic-diversity-of-speech-2404.10419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mad-speech-measures-of-acoustic-diversity-of-speech-2404.10419"/></url>
<url><loc>https://scifaro.com/en/abs/in-situ-sound-absorption-estimation-with-the-discrete-complex-image-source-method-2404.11399</loc><lastmod>2024-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-situ-sound-absorption-estimation-with-the-discrete-complex-image-source-method-2404.11399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-situ-sound-absorption-estimation-with-the-discrete-complex-image-source-method-2404.11399"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-speech-translation-a-corpus-of-mandarin-english-conversational-telephone-speech-2404.11619</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-speech-translation-a-corpus-of-mandarin-english-conversational-telephone-speech-2404.11619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-speech-translation-a-corpus-of-mandarin-english-conversational-telephone-speech-2404.11619"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-high-performance-bark-scale-neural-network-for-residual-echo-and-noise-suppression-2404.11621</loc><lastmod>2024-04-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-high-performance-bark-scale-neural-network-for-residual-echo-and-noise-suppression-2404.11621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-high-performance-bark-scale-neural-network-for-residual-echo-and-noise-suppression-2404.11621"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-potential-of-data-driven-spatial-audio-enhancement-using-a-single-channel-model-2404.14564</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-potential-of-data-driven-spatial-audio-enhancement-using-a-single-channel-model-2404.14564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-potential-of-data-driven-spatial-audio-enhancement-using-a-single-channel-model-2404.14564"/></url>
<url><loc>https://scifaro.com/en/abs/flashspeech-efficient-zero-shot-speech-synthesis-2404.14700</loc><lastmod>2024-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flashspeech-efficient-zero-shot-speech-synthesis-2404.14700"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flashspeech-efficient-zero-shot-speech-synthesis-2404.14700"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-processing-distortions-disentangling-the-impact-of-speech-enhancement-errors-on-speech-recognition-performance-2404.14860</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-processing-distortions-disentangling-the-impact-of-speech-enhancement-errors-on-speech-recognition-performance-2404.14860"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-processing-distortions-disentangling-the-impact-of-speech-enhancement-errors-on-speech-recognition-performance-2404.14860"/></url>
<url><loc>https://scifaro.com/en/abs/multi-sample-dynamic-time-warping-for-few-shot-keyword-spotting-2404.14903</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-sample-dynamic-time-warping-for-few-shot-keyword-spotting-2404.14903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-sample-dynamic-time-warping-for-few-shot-keyword-spotting-2404.14903"/></url>
<url><loc>https://scifaro.com/en/abs/additive-margin-in-contrastive-self-supervised-frameworks-to-learn-discriminative-speaker-representations-2404.14913</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/additive-margin-in-contrastive-self-supervised-frameworks-to-learn-discriminative-speaker-representations-2404.14913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/additive-margin-in-contrastive-self-supervised-frameworks-to-learn-discriminative-speaker-representations-2404.14913"/></url>
<url><loc>https://scifaro.com/en/abs/artificial-neural-networks-to-recognize-speakers-division-from-continuous-bengali-speech-2404.15168</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/artificial-neural-networks-to-recognize-speakers-division-from-continuous-bengali-speech-2404.15168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/artificial-neural-networks-to-recognize-speakers-division-from-continuous-bengali-speech-2404.15168"/></url>
<url><loc>https://scifaro.com/en/abs/voice-passing-a-non-binary-voice-gender-prediction-system-for-evaluating-transgender-voice-transition-2404.15176</loc><lastmod>2024-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-passing-a-non-binary-voice-gender-prediction-system-for-evaluating-transgender-voice-transition-2404.15176"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-passing-a-non-binary-voice-gender-prediction-system-for-evaluating-transgender-voice-transition-2404.15176"/></url>
<url><loc>https://scifaro.com/en/abs/evolution-of-voices-in-french-audiovisual-media-across-genders-and-age-in-a-diachronic-perspective-2404.16104</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evolution-of-voices-in-french-audiovisual-media-across-genders-and-age-in-a-diachronic-perspective-2404.16104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evolution-of-voices-in-french-audiovisual-media-across-genders-and-age-in-a-diachronic-perspective-2404.16104"/></url>
<url><loc>https://scifaro.com/en/abs/developing-acoustic-models-for-automatic-speech-recognition-in-swedish-2404.16547</loc><lastmod>2024-04-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-acoustic-models-for-automatic-speech-recognition-in-swedish-2404.16547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-acoustic-models-for-automatic-speech-recognition-in-swedish-2404.16547"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-pre-trained-general-purpose-audio-representations-for-heart-murmur-detection-2404.17107</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-pre-trained-general-purpose-audio-representations-for-heart-murmur-detection-2404.17107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-pre-trained-general-purpose-audio-representations-for-heart-murmur-detection-2404.17107"/></url>
<url><loc>https://scifaro.com/en/abs/the-carfac-v2-cochlear-model-in-matlab-numpy-and-jax-2404.17490</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-carfac-v2-cochlear-model-in-matlab-numpy-and-jax-2404.17490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-carfac-v2-cochlear-model-in-matlab-numpy-and-jax-2404.17490"/></url>
<url><loc>https://scifaro.com/en/abs/a-semi-automatic-approach-to-create-large-gender-and-age-balanced-speaker-corpora-usefulness-of-speaker-diarization-identification-2404.17552</loc><lastmod>2024-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-semi-automatic-approach-to-create-large-gender-and-age-balanced-speaker-corpora-usefulness-of-speaker-diarization-identification-2404.17552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-semi-automatic-approach-to-create-large-gender-and-age-balanced-speaker-corpora-usefulness-of-speaker-diarization-identification-2404.17552"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparison-of-differential-performance-metrics-for-the-evaluation-of-automatic-speaker-verification-fairness-2404.17810</loc><lastmod>2024-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparison-of-differential-performance-metrics-for-the-evaluation-of-automatic-speaker-verification-fairness-2404.17810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparison-of-differential-performance-metrics-for-the-evaluation-of-automatic-speaker-verification-fairness-2404.17810"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-target-speaker-extraction-with-reverse-selective-auditory-attention-2404.18501</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-target-speaker-extraction-with-reverse-selective-auditory-attention-2404.18501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-target-speaker-extraction-with-reverse-selective-auditory-attention-2404.18501"/></url>
<url><loc>https://scifaro.com/en/abs/deep-low-latency-joint-speech-transmission-and-enhancement-over-a-gaussian-channel-2404.19375</loc><lastmod>2024-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-low-latency-joint-speech-transmission-and-enhancement-over-a-gaussian-channel-2404.19375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-low-latency-joint-speech-transmission-and-enhancement-over-a-gaussian-channel-2404.19375"/></url>
<url><loc>https://scifaro.com/en/abs/attention-constrained-inference-for-robust-decoder-only-text-to-speech-2404.19723</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attention-constrained-inference-for-robust-decoder-only-text-to-speech-2404.19723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attention-constrained-inference-for-robust-decoder-only-text-to-speech-2404.19723"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-representations-for-speech-music-and-acoustic-events-2405.00934</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-representations-for-speech-music-and-acoustic-events-2405.00934"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-representations-for-speech-music-and-acoustic-events-2405.00934"/></url>
<url><loc>https://scifaro.com/en/abs/converting-anyone-s-voice-end-to-end-expressive-voice-conversion-with-a-conditional-diffusion-model-2405.01730</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/converting-anyone-s-voice-end-to-end-expressive-voice-conversion-with-a-conditional-diffusion-model-2405.01730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/converting-anyone-s-voice-end-to-end-expressive-voice-conversion-with-a-conditional-diffusion-model-2405.01730"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-multichannel-deep-speech-enhancement-in-hearing-aids-comparing-monaural-and-binaural-processing-in-complex-acoustic-scenarios-2405.01967</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-multichannel-deep-speech-enhancement-in-hearing-aids-comparing-monaural-and-binaural-processing-in-complex-acoustic-scenarios-2405.01967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-multichannel-deep-speech-enhancement-in-hearing-aids-comparing-monaural-and-binaural-processing-in-complex-acoustic-scenarios-2405.01967"/></url>
<url><loc>https://scifaro.com/en/abs/tipaa-ssl-text-independent-phone-to-audio-alignment-based-on-self-supervised-learning-and-knowledge-transfer-2405.02124</loc><lastmod>2024-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tipaa-ssl-text-independent-phone-to-audio-alignment-based-on-self-supervised-learning-and-knowledge-transfer-2405.02124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tipaa-ssl-text-independent-phone-to-audio-alignment-based-on-self-supervised-learning-and-knowledge-transfer-2405.02124"/></url>
<url><loc>https://scifaro.com/en/abs/mmger-multi-modal-and-multi-granularity-generative-error-correction-with-llm-for-joint-accent-and-speech-recognition-2405.03152</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmger-multi-modal-and-multi-granularity-generative-error-correction-with-llm-for-joint-accent-and-speech-recognition-2405.03152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmger-multi-modal-and-multi-granularity-generative-error-correction-with-llm-for-joint-accent-and-speech-recognition-2405.03152"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-assessment-of-dysarthria-using-audio-visual-vowel-graph-attention-network-2405.03254</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-assessment-of-dysarthria-using-audio-visual-vowel-graph-attention-network-2405.03254"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-assessment-of-dysarthria-using-audio-visual-vowel-graph-attention-network-2405.03254"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-characterization-by-means-of-attention-pooling-2405.04096</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-characterization-by-means-of-attention-pooling-2405.04096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-characterization-by-means-of-attention-pooling-2405.04096"/></url>
<url><loc>https://scifaro.com/en/abs/buddy-single-channel-blind-unsupervised-dereverberation-with-diffusion-models-2405.04272</loc><lastmod>2024-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/buddy-single-channel-blind-unsupervised-dereverberation-with-diffusion-models-2405.04272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/buddy-single-channel-blind-unsupervised-dereverberation-with-diffusion-models-2405.04272"/></url>
<url><loc>https://scifaro.com/en/abs/berp-a-blind-estimator-of-room-parameters-for-single-channel-noisy-speech-signals-2405.04476</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/berp-a-blind-estimator-of-room-parameters-for-single-channel-noisy-speech-signals-2405.04476"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/berp-a-blind-estimator-of-room-parameters-for-single-channel-noisy-speech-signals-2405.04476"/></url>
<url><loc>https://scifaro.com/en/abs/singit-singer-voice-transformation-2405.04627</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singit-singer-voice-transformation-2405.04627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singit-singer-voice-transformation-2405.04627"/></url>
<url><loc>https://scifaro.com/en/abs/hilcodec-high-fidelity-and-lightweight-neural-audio-codec-2405.04752</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hilcodec-high-fidelity-and-lightweight-neural-audio-codec-2405.04752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hilcodec-high-fidelity-and-lightweight-neural-audio-codec-2405.04752"/></url>
<url><loc>https://scifaro.com/en/abs/svdd-challenge-2024-a-singing-voice-deepfake-detection-challenge-evaluation-plan-2405.05244</loc><lastmod>2024-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svdd-challenge-2024-a-singing-voice-deepfake-detection-challenge-evaluation-plan-2405.05244"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svdd-challenge-2024-a-singing-voice-deepfake-detection-challenge-evaluation-plan-2405.05244"/></url>
<url><loc>https://scifaro.com/en/abs/ipdnet-a-universal-direct-path-ipd-estimation-network-for-sound-source-localization-2405.07021</loc><lastmod>2024-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ipdnet-a-universal-direct-path-ipd-estimation-network-for-sound-source-localization-2405.07021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ipdnet-a-universal-direct-path-ipd-estimation-network-for-sound-source-localization-2405.07021"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-speech-enhancement-systems-through-listening-effort-2405.07641</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-speech-enhancement-systems-through-listening-effort-2405.07641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-speech-enhancement-systems-through-listening-effort-2405.07641"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-mimo-systems-for-speech-to-text-transmission-2405.08096</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-mimo-systems-for-speech-to-text-transmission-2405.08096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-mimo-systems-for-speech-to-text-transmission-2405.08096"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-coding-for-real-time-communications-using-constant-bitrate-scalar-quantization-2405.08417</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-coding-for-real-time-communications-using-constant-bitrate-scalar-quantization-2405.08417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-coding-for-real-time-communications-using-constant-bitrate-scalar-quantization-2405.08417"/></url>
<url><loc>https://scifaro.com/en/abs/a-tunable-binaural-audio-telepresence-system-capable-of-balancing-immersive-and-enhanced-modes-2405.08742</loc><lastmod>2024-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-tunable-binaural-audio-telepresence-system-capable-of-balancing-immersive-and-enhanced-modes-2405.08742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-tunable-binaural-audio-telepresence-system-capable-of-balancing-immersive-and-enhanced-modes-2405.08742"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-embeddings-with-weakly-supervised-voice-activity-detection-for-efficient-speaker-diarization-2405.09142</loc><lastmod>2024-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-embeddings-with-weakly-supervised-voice-activity-detection-for-efficient-speaker-diarization-2405.09142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-embeddings-with-weakly-supervised-voice-activity-detection-for-efficient-speaker-diarization-2405.09142"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-text-to-speech-synthesis-from-a-large-discrete-token-based-speech-language-model-2405.09768</loc><lastmod>2024-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-text-to-speech-synthesis-from-a-large-discrete-token-based-speech-language-model-2405.09768"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-text-to-speech-synthesis-from-a-large-discrete-token-based-speech-language-model-2405.09768"/></url>
<url><loc>https://scifaro.com/en/abs/robust-singing-voice-transcription-serves-synthesis-2405.09940</loc><lastmod>2024-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-singing-voice-transcription-serves-synthesis-2405.09940"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-singing-voice-transcription-serves-synthesis-2405.09940"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-in-the-dcase-2024-challenge-2405.10018</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-in-the-dcase-2024-challenge-2405.10018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-low-complexity-acoustic-scene-classification-in-the-dcase-2024-challenge-2405.10018"/></url>
<url><loc>https://scifaro.com/en/abs/monaural-speech-enhancement-on-drone-via-adapter-based-transfer-learning-2405.10022</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/monaural-speech-enhancement-on-drone-via-adapter-based-transfer-learning-2405.10022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/monaural-speech-enhancement-on-drone-via-adapter-based-transfer-learning-2405.10022"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-deep-audio-text-retrieval-through-the-lens-of-transportation-2405.10084</loc><lastmod>2024-05-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-deep-audio-text-retrieval-through-the-lens-of-transportation-2405.10084"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-deep-audio-text-retrieval-through-the-lens-of-transportation-2405.10084"/></url>
<url><loc>https://scifaro.com/en/abs/distinctive-and-natural-speaker-anonymization-via-singular-value-transformation-assisted-matrix-2405.10786</loc><lastmod>2024-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distinctive-and-natural-speaker-anonymization-via-singular-value-transformation-assisted-matrix-2405.10786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distinctive-and-natural-speaker-anonymization-via-singular-value-transformation-assisted-matrix-2405.10786"/></url>
<url><loc>https://scifaro.com/en/abs/acoustic-modeling-for-overlapping-speech-recognition-jhu-chime-5-challenge-system-2405.11078</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/acoustic-modeling-for-overlapping-speech-recognition-jhu-chime-5-challenge-system-2405.11078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/acoustic-modeling-for-overlapping-speech-recognition-jhu-chime-5-challenge-system-2405.11078"/></url>
<url><loc>https://scifaro.com/en/abs/audiosetmix-enhancing-audio-language-datasets-with-llm-assisted-augmentations-2405.11093</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiosetmix-enhancing-audio-language-datasets-with-llm-assisted-augmentations-2405.11093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiosetmix-enhancing-audio-language-datasets-with-llm-assisted-augmentations-2405.11093"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speech-style-spaces-with-language-models-emotional-tts-without-emotion-labels-2405.11413</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speech-style-spaces-with-language-models-emotional-tts-without-emotion-labels-2405.11413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speech-style-spaces-with-language-models-emotional-tts-without-emotion-labels-2405.11413"/></url>
<url><loc>https://scifaro.com/en/abs/speech-dependent-data-augmentation-for-own-voice-reconstruction-with-hearable-microphones-in-noisy-environments-2405.11592</loc><lastmod>2025-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-dependent-data-augmentation-for-own-voice-reconstruction-with-hearable-microphones-in-noisy-environments-2405.11592"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-dependent-data-augmentation-for-own-voice-reconstruction-with-hearable-microphones-in-noisy-environments-2405.11592"/></url>
<url><loc>https://scifaro.com/en/abs/multi-speaker-text-to-speech-training-with-speaker-anonymized-data-2405.11767</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-speaker-text-to-speech-training-with-speaker-anonymized-data-2405.11767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-speaker-text-to-speech-training-with-speaker-anonymized-data-2405.11767"/></url>
<url><loc>https://scifaro.com/en/abs/source-localization-by-multidimensional-steered-response-power-mapping-with-sparse-bayesian-learning-2405.11792</loc><lastmod>2024-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-localization-by-multidimensional-steered-response-power-mapping-with-sparse-bayesian-learning-2405.11792"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-localization-by-multidimensional-steered-response-power-mapping-with-sparse-bayesian-learning-2405.11792"/></url>
<url><loc>https://scifaro.com/en/abs/ssamba-self-supervised-audio-representation-learning-with-mamba-state-space-model-2405.11831</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssamba-self-supervised-audio-representation-learning-with-mamba-state-space-model-2405.11831"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssamba-self-supervised-audio-representation-learning-with-mamba-state-space-model-2405.11831"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-integrating-wireless-technology-into-active-noise-control-2405.12496</loc><lastmod>2024-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-integrating-wireless-technology-into-active-noise-control-2405.12496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-integrating-wireless-technology-into-active-noise-control-2405.12496"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-in-speech-towards-an-alternative-to-self-attention-2405.12609</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-in-speech-towards-an-alternative-to-self-attention-2405.12609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-in-speech-towards-an-alternative-to-self-attention-2405.12609"/></url>
<url><loc>https://scifaro.com/en/abs/multilingual-audio-visual-speech-recognition-with-hybrid-ctc-rnn-t-fast-conformer-2405.12983</loc><lastmod>2024-05-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multilingual-audio-visual-speech-recognition-with-hybrid-ctc-rnn-t-fast-conformer-2405.12983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multilingual-audio-visual-speech-recognition-with-hybrid-ctc-rnn-t-fast-conformer-2405.12983"/></url>
<url><loc>https://scifaro.com/en/abs/fairlens-assessing-fairness-in-law-enforcement-speech-recognition-2405.13166</loc><lastmod>2024-05-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fairlens-assessing-fairness-in-law-enforcement-speech-recognition-2405.13166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fairlens-assessing-fairness-in-law-enforcement-speech-recognition-2405.13166"/></url>
<url><loc>https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-dynamic-vocabulary-2405.13344</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-dynamic-vocabulary-2405.13344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextualized-automatic-speech-recognition-with-dynamic-vocabulary-2405.13344"/></url>
<url><loc>https://scifaro.com/en/abs/joint-optimization-of-streaming-and-non-streaming-automatic-speech-recognition-with-multi-decoder-and-knowledge-distillation-2405.13514</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-optimization-of-streaming-and-non-streaming-automatic-speech-recognition-with-multi-decoder-and-knowledge-distillation-2405.13514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-optimization-of-streaming-and-non-streaming-automatic-speech-recognition-with-multi-decoder-and-knowledge-distillation-2405.13514"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-and-accurate-zero-shot-high-fidelity-singing-voice-conversion-with-multi-condition-flow-synthesis-2405.15093</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-and-accurate-zero-shot-high-fidelity-singing-voice-conversion-with-multi-condition-flow-synthesis-2405.15093"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-and-accurate-zero-shot-high-fidelity-singing-voice-conversion-with-multi-condition-flow-synthesis-2405.15093"/></url>
<url><loc>https://scifaro.com/en/abs/crossmodal-asr-error-correction-with-discrete-speech-units-2405.16677</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossmodal-asr-error-correction-with-discrete-speech-units-2405.16677"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossmodal-asr-error-correction-with-discrete-speech-units-2405.16677"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-deep-learning-architecture-for-efficient-edge-processing-2405.16834</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-deep-learning-architecture-for-efficient-edge-processing-2405.16834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-deep-learning-architecture-for-efficient-edge-processing-2405.16834"/></url>
<url><loc>https://scifaro.com/en/abs/a-variance-preserving-interpolation-approach-for-diffusion-models-with-applications-to-single-channel-speech-enhancement-and-recognition-2405.16952</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-variance-preserving-interpolation-approach-for-diffusion-models-with-applications-to-single-channel-speech-enhancement-and-recognition-2405.16952"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-variance-preserving-interpolation-approach-for-diffusion-models-with-applications-to-single-channel-speech-enhancement-and-recognition-2405.16952"/></url>
<url><loc>https://scifaro.com/en/abs/speech-loudness-in-broadcasting-and-streaming-2405.17364</loc><lastmod>2024-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-loudness-in-broadcasting-and-streaming-2405.17364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-loudness-in-broadcasting-and-streaming-2405.17364"/></url>
<url><loc>https://scifaro.com/en/abs/gaussian-flow-bridges-for-audio-domain-transfer-with-unpaired-data-2405.19497</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaussian-flow-bridges-for-audio-domain-transfer-with-unpaired-data-2405.19497"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaussian-flow-bridges-for-audio-domain-transfer-with-unpaired-data-2405.19497"/></url>
<url><loc>https://scifaro.com/en/abs/1st-place-solution-to-odyssey-emotion-recognition-challenge-task1-tackling-class-imbalance-problem-2405.20064</loc><lastmod>2024-05-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1st-place-solution-to-odyssey-emotion-recognition-challenge-task1-tackling-class-imbalance-problem-2405.20064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1st-place-solution-to-odyssey-emotion-recognition-challenge-task1-tackling-class-imbalance-problem-2405.20064"/></url>
<url><loc>https://scifaro.com/en/abs/cross-talk-reduction-2405.20402</loc><lastmod>2024-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-talk-reduction-2405.20402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-talk-reduction-2405.20402"/></url>
<url><loc>https://scifaro.com/en/abs/very-low-complexity-speech-synthesis-using-framewise-autoregressive-gan-fargan-with-pitch-prediction-2405.21069</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/very-low-complexity-speech-synthesis-using-framewise-autoregressive-gan-fargan-with-pitch-prediction-2405.21069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/very-low-complexity-speech-synthesis-using-framewise-autoregressive-gan-fargan-with-pitch-prediction-2405.21069"/></url>
<url><loc>https://scifaro.com/en/abs/audiolcm-text-to-audio-generation-with-latent-consistency-models-2406.00356</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiolcm-text-to-audio-generation-with-latent-consistency-models-2406.00356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiolcm-text-to-audio-generation-with-latent-consistency-models-2406.00356"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-talker-localization-in-video-for-spatial-sound-reproduction-2406.00495</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-talker-localization-in-video-for-spatial-sound-reproduction-2406.00495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-talker-localization-in-video-for-spatial-sound-reproduction-2406.00495"/></url>
<url><loc>https://scifaro.com/en/abs/wav2prompt-end-to-end-speech-prompt-generation-and-tuning-for-llm-in-zero-and-few-shot-learning-2406.00522</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2prompt-end-to-end-speech-prompt-generation-and-tuning-for-llm-in-zero-and-few-shot-learning-2406.00522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2prompt-end-to-end-speech-prompt-generation-and-tuning-for-llm-in-zero-and-few-shot-learning-2406.00522"/></url>
<url><loc>https://scifaro.com/en/abs/accent-conversion-in-text-to-speech-using-multi-level-vae-and-adversarial-training-2406.01018</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-conversion-in-text-to-speech-using-multi-level-vae-and-adversarial-training-2406.01018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-conversion-in-text-to-speech-using-multi-level-vae-and-adversarial-training-2406.01018"/></url>
<url><loc>https://scifaro.com/en/abs/controlspeech-towards-simultaneous-and-independent-zero-shot-speaker-cloning-and-zero-shot-language-style-control-2406.01205</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/controlspeech-towards-simultaneous-and-independent-zero-shot-speaker-cloning-and-zero-shot-language-style-control-2406.01205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/controlspeech-towards-simultaneous-and-independent-zero-shot-speaker-cloning-and-zero-shot-language-style-control-2406.01205"/></url>
<url><loc>https://scifaro.com/en/abs/unveiling-hidden-factors-explainable-ai-for-feature-boosting-in-speech-emotion-recognition-2406.01624</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unveiling-hidden-factors-explainable-ai-for-feature-boosting-in-speech-emotion-recognition-2406.01624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unveiling-hidden-factors-explainable-ai-for-feature-boosting-in-speech-emotion-recognition-2406.01624"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-enhanced-language-modeling-for-text-to-speech-synthesis-2406.02009</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-enhanced-language-modeling-for-text-to-speech-synthesis-2406.02009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-enhanced-language-modeling-for-text-to-speech-synthesis-2406.02009"/></url>
<url><loc>https://scifaro.com/en/abs/m2d-clap-masked-modeling-duo-meets-clap-for-learning-general-purpose-audio-language-representation-2406.02032</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2d-clap-masked-modeling-duo-meets-clap-for-learning-general-purpose-audio-language-representation-2406.02032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2d-clap-masked-modeling-duo-meets-clap-for-learning-general-purpose-audio-language-representation-2406.02032"/></url>
<url><loc>https://scifaro.com/en/abs/simultron-on-device-simultaneous-speech-to-speech-translation-2406.02133</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultron-on-device-simultaneous-speech-to-speech-translation-2406.02133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultron-on-device-simultaneous-speech-to-speech-translation-2406.02133"/></url>
<url><loc>https://scifaro.com/en/abs/bivocoder-a-bidirectional-neural-vocoder-integrating-feature-extraction-and-waveform-generation-2406.02162</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bivocoder-a-bidirectional-neural-vocoder-integrating-feature-extraction-and-waveform-generation-2406.02162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bivocoder-a-bidirectional-neural-vocoder-integrating-feature-extraction-and-waveform-generation-2406.02162"/></url>
<url><loc>https://scifaro.com/en/abs/eres2netv2-boosting-short-duration-speaker-verification-performance-with-computational-efficiency-2406.02167</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eres2netv2-boosting-short-duration-speaker-verification-performance-with-computational-efficiency-2406.02167"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eres2netv2-boosting-short-duration-speaker-verification-performance-with-computational-efficiency-2406.02167"/></url>
<url><loc>https://scifaro.com/en/abs/towards-out-of-distribution-detection-in-vocoder-recognition-via-latent-feature-reconstruction-2406.02233</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-out-of-distribution-detection-in-vocoder-recognition-via-latent-feature-reconstruction-2406.02233"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-out-of-distribution-detection-in-vocoder-recognition-via-latent-feature-reconstruction-2406.02233"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stage-speech-bandwidth-extension-with-flexible-sampling-rate-control-2406.02250</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stage-speech-bandwidth-extension-with-flexible-sampling-rate-control-2406.02250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stage-speech-bandwidth-extension-with-flexible-sampling-rate-control-2406.02250"/></url>
<url><loc>https://scifaro.com/en/abs/midicaps-a-large-scale-midi-dataset-with-text-captions-2406.02255</loc><lastmod>2025-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/midicaps-a-large-scale-midi-dataset-with-text-captions-2406.02255"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/midicaps-a-large-scale-midi-dataset-with-text-captions-2406.02255"/></url>
<url><loc>https://scifaro.com/en/abs/towards-supervised-performance-on-speaker-verification-with-self-supervised-learning-by-leveraging-large-scale-asr-models-2406.02285</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-supervised-performance-on-speaker-verification-with-self-supervised-learning-by-leveraging-large-scale-asr-models-2406.02285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-supervised-performance-on-speaker-verification-with-self-supervised-learning-by-leveraging-large-scale-asr-models-2406.02285"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-singing-voice-pre-training-towards-speech-to-singing-conversion-2406.02429</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-singing-voice-pre-training-towards-speech-to-singing-conversion-2406.02429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-singing-voice-pre-training-towards-speech-to-singing-conversion-2406.02429"/></url>
<url><loc>https://scifaro.com/en/abs/seed-tts-a-family-of-high-quality-versatile-speech-generation-models-2406.02430</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seed-tts-a-family-of-high-quality-versatile-speech-generation-models-2406.02430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seed-tts-a-family-of-high-quality-versatile-speech-generation-models-2406.02430"/></url>
<url><loc>https://scifaro.com/en/abs/ctrsvdd-a-benchmark-dataset-and-baseline-analysis-for-controlled-singing-voice-deepfake-detection-2406.02438</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctrsvdd-a-benchmark-dataset-and-baseline-analysis-for-controlled-singing-voice-deepfake-detection-2406.02438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctrsvdd-a-benchmark-dataset-and-baseline-analysis-for-controlled-singing-voice-deepfake-detection-2406.02438"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-deep-learning-analysis-for-raga-identification-in-indian-art-music-2406.02443</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-deep-learning-analysis-for-raga-identification-in-indian-art-music-2406.02443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-deep-learning-analysis-for-raga-identification-in-indian-art-music-2406.02443"/></url>
<url><loc>https://scifaro.com/en/abs/how-do-neural-spoofing-countermeasures-detect-partially-spoofed-audio-2406.02483</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-do-neural-spoofing-countermeasures-detect-partially-spoofed-audio-2406.02483"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-do-neural-spoofing-countermeasures-detect-partially-spoofed-audio-2406.02483"/></url>
<url><loc>https://scifaro.com/en/abs/language-universal-speech-attributes-modeling-for-zero-shot-multilingual-spoken-keyword-recognition-2406.02488</loc><lastmod>2024-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-universal-speech-attributes-modeling-for-zero-shot-multilingual-spoken-keyword-recognition-2406.02488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-universal-speech-attributes-modeling-for-zero-shot-multilingual-spoken-keyword-recognition-2406.02488"/></url>
<url><loc>https://scifaro.com/en/abs/hear-me-see-me-understand-me-audio-visual-autism-behavior-recognition-2406.02554</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hear-me-see-me-understand-me-audio-visual-autism-behavior-recognition-2406.02554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hear-me-see-me-understand-me-audio-visual-autism-behavior-recognition-2406.02554"/></url>
<url><loc>https://scifaro.com/en/abs/phowhisper-automatic-speech-recognition-for-vietnamese-2406.02555</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phowhisper-automatic-speech-recognition-for-vietnamese-2406.02555"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phowhisper-automatic-speech-recognition-for-vietnamese-2406.02555"/></url>
<url><loc>https://scifaro.com/en/abs/less-peaky-and-more-accurate-ctc-forced-alignment-by-label-priors-2406.02560</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/less-peaky-and-more-accurate-ctc-forced-alignment-by-label-priors-2406.02560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/less-peaky-and-more-accurate-ctc-forced-alignment-by-label-priors-2406.02560"/></url>
<url><loc>https://scifaro.com/en/abs/breaking-walls-pioneering-automatic-speech-recognition-for-central-kurdish-end-to-end-transformer-paradigm-2406.02561</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breaking-walls-pioneering-automatic-speech-recognition-for-central-kurdish-end-to-end-transformer-paradigm-2406.02561"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breaking-walls-pioneering-automatic-speech-recognition-for-central-kurdish-end-to-end-transformer-paradigm-2406.02561"/></url>
<url><loc>https://scifaro.com/en/abs/gated-low-rank-adaptation-for-personalized-code-switching-automatic-speech-recognition-on-the-low-spec-devices-2406.02562</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gated-low-rank-adaptation-for-personalized-code-switching-automatic-speech-recognition-on-the-low-spec-devices-2406.02562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gated-low-rank-adaptation-for-personalized-code-switching-automatic-speech-recognition-on-the-low-spec-devices-2406.02562"/></url>
<url><loc>https://scifaro.com/en/abs/a-cost-minimization-approach-to-fix-the-vocabulary-size-in-a-tokenizer-for-an-end-to-end-asr-system-2406.02563</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-cost-minimization-approach-to-fix-the-vocabulary-size-in-a-tokenizer-for-an-end-to-end-asr-system-2406.02563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-cost-minimization-approach-to-fix-the-vocabulary-size-in-a-tokenizer-for-an-end-to-end-asr-system-2406.02563"/></url>
<url><loc>https://scifaro.com/en/abs/combining-x-vectors-and-bayesian-batch-active-learning-two-stage-active-learning-pipeline-for-speech-recognition-2406.02566</loc><lastmod>2026-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combining-x-vectors-and-bayesian-batch-active-learning-two-stage-active-learning-pipeline-for-speech-recognition-2406.02566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combining-x-vectors-and-bayesian-batch-active-learning-two-stage-active-learning-pipeline-for-speech-recognition-2406.02566"/></url>
<url><loc>https://scifaro.com/en/abs/cluster-to-predict-affect-contours-from-speech-2406.02569</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cluster-to-predict-affect-contours-from-speech-2406.02569"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cluster-to-predict-affect-contours-from-speech-2406.02569"/></url>
<url><loc>https://scifaro.com/en/abs/selfsupervised-learning-for-pathological-speech-detection-2406.02572</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selfsupervised-learning-for-pathological-speech-detection-2406.02572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selfsupervised-learning-for-pathological-speech-detection-2406.02572"/></url>
<url><loc>https://scifaro.com/en/abs/ppintonus-early-detection-of-parkinson-s-disease-using-deep-learning-tonal-analysis-2406.02608</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ppintonus-early-detection-of-parkinson-s-disease-using-deep-learning-tonal-analysis-2406.02608"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ppintonus-early-detection-of-parkinson-s-disease-using-deep-learning-tonal-analysis-2406.02608"/></url>
<url><loc>https://scifaro.com/en/abs/keyword-guided-adaptation-of-automatic-speech-recognition-2406.02649</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/keyword-guided-adaptation-of-automatic-speech-recognition-2406.02649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/keyword-guided-adaptation-of-automatic-speech-recognition-2406.02649"/></url>
<url><loc>https://scifaro.com/en/abs/repcnn-micro-sized-mighty-models-for-wakeword-detection-2406.02652</loc><lastmod>2024-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/repcnn-micro-sized-mighty-models-for-wakeword-detection-2406.02652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/repcnn-micro-sized-mighty-models-for-wakeword-detection-2406.02652"/></url>
<url><loc>https://scifaro.com/en/abs/conpco-preserving-phoneme-characteristics-for-automatic-pronunciation-assessment-leveraging-contrastive-ordinal-regularization-2406.02859</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conpco-preserving-phoneme-characteristics-for-automatic-pronunciation-assessment-leveraging-contrastive-ordinal-regularization-2406.02859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conpco-preserving-phoneme-characteristics-for-automatic-pronunciation-assessment-leveraging-contrastive-ordinal-regularization-2406.02859"/></url>
<url><loc>https://scifaro.com/en/abs/usm-rnn-t-model-weights-binarization-2406.02887</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usm-rnn-t-model-weights-binarization-2406.02887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usm-rnn-t-model-weights-binarization-2406.02887"/></url>
<url><loc>https://scifaro.com/en/abs/task-arithmetic-can-mitigate-synthetic-to-real-gap-in-automatic-speech-recognition-2406.02925</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-arithmetic-can-mitigate-synthetic-to-real-gap-in-automatic-speech-recognition-2406.02925"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-arithmetic-can-mitigate-synthetic-to-real-gap-in-automatic-speech-recognition-2406.02925"/></url>
<url><loc>https://scifaro.com/en/abs/joint-beam-search-integrating-ctc-attention-and-transducer-decoders-2406.02950</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-beam-search-integrating-ctc-attention-and-transducer-decoders-2406.02950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-beam-search-integrating-ctc-attention-and-transducer-decoders-2406.02950"/></url>
<url><loc>https://scifaro.com/en/abs/singing-voice-graph-modeling-for-singfake-detection-2406.03111</loc><lastmod>2025-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/singing-voice-graph-modeling-for-singfake-detection-2406.03111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/singing-voice-graph-modeling-for-singfake-detection-2406.03111"/></url>
<url><loc>https://scifaro.com/en/abs/revrir-joint-reverberant-speech-and-room-impulse-response-embedding-using-contrastive-learning-with-application-to-room-shape-classification-2406.03120</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revrir-joint-reverberant-speech-and-room-impulse-response-embedding-using-contrastive-learning-with-application-to-room-shape-classification-2406.03120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revrir-joint-reverberant-speech-and-room-impulse-response-embedding-using-contrastive-learning-with-application-to-room-shape-classification-2406.03120"/></url>
<url><loc>https://scifaro.com/en/abs/once-more-diarization-improving-meeting-transcription-systems-through-segment-level-speaker-reassignment-2406.03155</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/once-more-diarization-improving-meeting-transcription-systems-through-segment-level-speaker-reassignment-2406.03155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/once-more-diarization-improving-meeting-transcription-systems-through-segment-level-speaker-reassignment-2406.03155"/></url>
<url><loc>https://scifaro.com/en/abs/collab-a-collaborative-approach-for-multilingual-abuse-detection-2406.03205</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/collab-a-collaborative-approach-for-multilingual-abuse-detection-2406.03205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/collab-a-collaborative-approach-for-multilingual-abuse-detection-2406.03205"/></url>
<url><loc>https://scifaro.com/en/abs/reference-channel-selection-by-multi-channel-masking-for-end-to-end-multi-channel-speech-enhancement-2406.03228</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reference-channel-selection-by-multi-channel-masking-for-end-to-end-multi-channel-speech-enhancement-2406.03228"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reference-channel-selection-by-multi-channel-masking-for-end-to-end-multi-channel-speech-enhancement-2406.03228"/></url>
<url><loc>https://scifaro.com/en/abs/multi-microphone-speech-emotion-recognition-using-the-hierarchical-token-semantic-audio-transformer-architecture-2406.03272</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-microphone-speech-emotion-recognition-using-the-hierarchical-token-semantic-audio-transformer-architecture-2406.03272"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-microphone-speech-emotion-recognition-using-the-hierarchical-token-semantic-audio-transformer-architecture-2406.03272"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-ctc-based-speech-recognition-with-diverse-modeling-units-2406.03274</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-ctc-based-speech-recognition-with-diverse-modeling-units-2406.03274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-ctc-based-speech-recognition-with-diverse-modeling-units-2406.03274"/></url>
<url><loc>https://scifaro.com/en/abs/the-pesqetarian-on-the-relevance-of-goodhart-s-law-for-speech-enhancement-2406.03460</loc><lastmod>2024-06-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-pesqetarian-on-the-relevance-of-goodhart-s-law-for-speech-enhancement-2406.03460"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-pesqetarian-on-the-relevance-of-goodhart-s-law-for-speech-enhancement-2406.03460"/></url>
<url><loc>https://scifaro.com/en/abs/neuro-an-application-for-code-switched-autism-detection-in-children-2406.03514</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuro-an-application-for-code-switched-autism-detection-in-children-2406.03514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuro-an-application-for-code-switched-autism-detection-in-children-2406.03514"/></url>
<url><loc>https://scifaro.com/en/abs/style-mixture-of-experts-for-expressive-text-to-speech-synthesis-2406.03637</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/style-mixture-of-experts-for-expressive-text-to-speech-synthesis-2406.03637"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/style-mixture-of-experts-for-expressive-text-to-speech-synthesis-2406.03637"/></url>
<url><loc>https://scifaro.com/en/abs/urban-urban-beehive-acoustics-and-phenotyping-dataset-2406.03657</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/urban-urban-beehive-acoustics-and-phenotyping-dataset-2406.03657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/urban-urban-beehive-acoustics-and-phenotyping-dataset-2406.03657"/></url>
<url><loc>https://scifaro.com/en/abs/pldnet-pld-guided-lightweight-deep-network-boosted-by-efficient-attention-for-handheld-dual-microphone-speech-enhancement-2406.03899</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pldnet-pld-guided-lightweight-deep-network-boosted-by-efficient-attention-for-handheld-dual-microphone-speech-enhancement-2406.03899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pldnet-pld-guided-lightweight-deep-network-boosted-by-efficient-attention-for-handheld-dual-microphone-speech-enhancement-2406.03899"/></url>
<url><loc>https://scifaro.com/en/abs/helsinki-speech-challenge-2024-2406.04123</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/helsinki-speech-challenge-2024-2406.04123"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/helsinki-speech-challenge-2024-2406.04123"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-bounding-boxes-2406.04212</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-bounding-boxes-2406.04212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-bounding-boxes-2406.04212"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-performance-plateaus-a-comprehensive-study-on-scalability-in-speech-enhancement-2406.04269</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-performance-plateaus-a-comprehensive-study-on-scalability-in-speech-enhancement-2406.04269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-performance-plateaus-a-comprehensive-study-on-scalability-in-speech-enhancement-2406.04269"/></url>
<url><loc>https://scifaro.com/en/abs/total-duration-aware-duration-modeling-for-text-to-speech-systems-2406.04281</loc><lastmod>2024-06-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/total-duration-aware-duration-modeling-for-text-to-speech-systems-2406.04281"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/total-duration-aware-duration-modeling-for-text-to-speech-systems-2406.04281"/></url>
<url><loc>https://scifaro.com/en/abs/introducing-the-brand-new-qiandaoear22-dataset-for-specific-ship-identification-using-ship-radiated-noise-2406.04353</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/introducing-the-brand-new-qiandaoear22-dataset-for-specific-ship-identification-using-ship-radiated-noise-2406.04353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/introducing-the-brand-new-qiandaoear22-dataset-for-specific-ship-identification-using-ship-radiated-noise-2406.04353"/></url>
<url><loc>https://scifaro.com/en/abs/qiandaoear22-a-high-quality-noise-dataset-for-identifying-specific-ship-from-multiple-underwater-acoustic-targets-using-ship-radiated-noise-2406.04354</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qiandaoear22-a-high-quality-noise-dataset-for-identifying-specific-ship-from-multiple-underwater-acoustic-targets-using-ship-radiated-noise-2406.04354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qiandaoear22-a-high-quality-noise-dataset-for-identifying-specific-ship-from-multiple-underwater-acoustic-targets-using-ship-radiated-noise-2406.04354"/></url>
<url><loc>https://scifaro.com/en/abs/inagvad-a-challenging-french-tv-and-radio-corpus-annotated-for-speech-activity-detection-and-speaker-gender-segmentation-2406.04429</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inagvad-a-challenging-french-tv-and-radio-corpus-annotated-for-speech-activity-detection-and-speaker-gender-segmentation-2406.04429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inagvad-a-challenging-french-tv-and-radio-corpus-annotated-for-speech-activity-detection-and-speaker-gender-segmentation-2406.04429"/></url>
<url><loc>https://scifaro.com/en/abs/lipger-visually-conditioned-generative-error-correction-for-robust-automatic-speech-recognition-2406.04432</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipger-visually-conditioned-generative-error-correction-for-robust-automatic-speech-recognition-2406.04432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipger-visually-conditioned-generative-error-correction-for-robust-automatic-speech-recognition-2406.04432"/></url>
<url><loc>https://scifaro.com/en/abs/small-e-small-language-model-with-linear-attention-for-efficient-speech-synthesis-2406.04467</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/small-e-small-language-model-with-linear-attention-for-efficient-speech-synthesis-2406.04467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/small-e-small-language-model-with-linear-attention-for-efficient-speech-synthesis-2406.04467"/></url>
<url><loc>https://scifaro.com/en/abs/towards-naturalistic-voice-conversion-naturalvoices-dataset-with-an-automatic-processing-pipeline-2406.04494</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-naturalistic-voice-conversion-naturalvoices-dataset-with-an-automatic-processing-pipeline-2406.04494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-naturalistic-voice-conversion-naturalvoices-dataset-with-an-automatic-processing-pipeline-2406.04494"/></url>
<url><loc>https://scifaro.com/en/abs/flexible-multichannel-speech-enhancement-for-noise-robust-frontend-2406.04552</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexible-multichannel-speech-enhancement-for-noise-robust-frontend-2406.04552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexible-multichannel-speech-enhancement-for-noise-robust-frontend-2406.04552"/></url>
<url><loc>https://scifaro.com/en/abs/neural-codec-based-adversarial-sample-detection-for-speaker-verification-2406.04582</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-codec-based-adversarial-sample-detection-for-speaker-verification-2406.04582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-codec-based-adversarial-sample-detection-for-speaker-verification-2406.04582"/></url>
<url><loc>https://scifaro.com/en/abs/what-do-mllms-hear-examining-reasoning-with-text-and-sound-components-in-multimodal-large-language-models-2406.04615</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/what-do-mllms-hear-examining-reasoning-with-text-and-sound-components-in-multimodal-large-language-models-2406.04615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/what-do-mllms-hear-examining-reasoning-with-text-and-sound-components-in-multimodal-large-language-models-2406.04615"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-diffusion-model-for-spectrogram-up-sampling-in-text-to-speech-an-empirical-study-2406.04633</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-diffusion-model-for-spectrogram-up-sampling-in-text-to-speech-an-empirical-study-2406.04633"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-diffusion-model-for-spectrogram-up-sampling-in-text-to-speech-an-empirical-study-2406.04633"/></url>
<url><loc>https://scifaro.com/en/abs/urgent-challenge-universality-robustness-and-generalizability-for-speech-enhancement-2406.04660</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/urgent-challenge-universality-robustness-and-generalizability-for-speech-enhancement-2406.04660"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/urgent-challenge-universality-robustness-and-generalizability-for-speech-enhancement-2406.04660"/></url>
<url><loc>https://scifaro.com/en/abs/xtts-a-massively-multilingual-zero-shot-text-to-speech-model-2406.04904</loc><lastmod>2024-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xtts-a-massively-multilingual-zero-shot-text-to-speech-model-2406.04904"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xtts-a-massively-multilingual-zero-shot-text-to-speech-model-2406.04904"/></url>
<url><loc>https://scifaro.com/en/abs/llm-based-speaker-diarization-correction-a-generalizable-approach-2406.04927</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llm-based-speaker-diarization-correction-a-generalizable-approach-2406.04927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llm-based-speaker-diarization-correction-a-generalizable-approach-2406.04927"/></url>
<url><loc>https://scifaro.com/en/abs/the-database-and-benchmark-for-the-source-speaker-tracing-challenge-2024-2406.04951</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-database-and-benchmark-for-the-source-speaker-tracing-challenge-2024-2406.04951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-database-and-benchmark-for-the-source-speaker-tracing-challenge-2024-2406.04951"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-social-bias-of-speech-self-supervised-models-2406.04997</loc><lastmod>2026-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-social-bias-of-speech-self-supervised-models-2406.04997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-social-bias-of-speech-self-supervised-models-2406.04997"/></url>
<url><loc>https://scifaro.com/en/abs/emo-bias-a-large-scale-evaluation-of-social-bias-on-speech-emotion-recognition-2406.05065</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-bias-a-large-scale-evaluation-of-social-bias-on-speech-emotion-recognition-2406.05065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-bias-a-large-scale-evaluation-of-social-bias-on-speech-emotion-recognition-2406.05065"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-time-varying-linear-prediction-in-the-context-of-end-to-end-analysis-by-synthesis-2406.05128</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-time-varying-linear-prediction-in-the-context-of-end-to-end-analysis-by-synthesis-2406.05128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-time-varying-linear-prediction-in-the-context-of-end-to-end-analysis-by-synthesis-2406.05128"/></url>
<url><loc>https://scifaro.com/en/abs/xane-explainable-acoustic-neural-embeddings-2406.05199</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xane-explainable-acoustic-neural-embeddings-2406.05199"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xane-explainable-acoustic-neural-embeddings-2406.05199"/></url>
<url><loc>https://scifaro.com/en/abs/a-model-of-early-word-acquisition-based-on-realistic-scale-audiovisual-naming-events-2406.05259</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-model-of-early-word-acquisition-based-on-realistic-scale-audiovisual-naming-events-2406.05259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-model-of-early-word-acquisition-based-on-realistic-scale-audiovisual-naming-events-2406.05259"/></url>
<url><loc>https://scifaro.com/en/abs/signal-processing-algorithm-effective-for-sound-quality-of-hearing-loss-simulators-2406.05286</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/signal-processing-algorithm-effective-for-sound-quality-of-hearing-loss-simulators-2406.05286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/signal-processing-algorithm-effective-for-sound-quality-of-hearing-loss-simulators-2406.05286"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-codecs-improving-non-autoregressive-speech-synthesis-with-spectrogram-based-audio-codecs-2406.05298</loc><lastmod>2025-06-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-codecs-improving-non-autoregressive-speech-synthesis-with-spectrogram-based-audio-codecs-2406.05298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-codecs-improving-non-autoregressive-speech-synthesis-with-spectrogram-based-audio-codecs-2406.05298"/></url>
<url><loc>https://scifaro.com/en/abs/relational-proxy-loss-for-audio-text-based-keyword-spotting-2406.05314</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/relational-proxy-loss-for-audio-text-based-keyword-spotting-2406.05314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/relational-proxy-loss-for-audio-text-based-keyword-spotting-2406.05314"/></url>
<url><loc>https://scifaro.com/en/abs/ldm-svc-latent-diffusion-model-based-zero-shot-any-to-any-singing-voice-conversion-with-singer-guidance-2406.05325</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ldm-svc-latent-diffusion-model-based-zero-shot-any-to-any-singing-voice-conversion-with-singer-guidance-2406.05325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ldm-svc-latent-diffusion-model-based-zero-shot-any-to-any-singing-voice-conversion-with-singer-guidance-2406.05325"/></url>
<url><loc>https://scifaro.com/en/abs/to-what-extent-can-asv-systems-naturally-defend-against-spoofing-attacks-2406.05339</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/to-what-extent-can-asv-systems-naturally-defend-against-spoofing-attacks-2406.05339"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/to-what-extent-can-asv-systems-naturally-defend-against-spoofing-attacks-2406.05339"/></url>
<url><loc>https://scifaro.com/en/abs/diversifying-and-expanding-frequency-adaptive-convolution-kernels-for-sound-event-detection-2406.05341</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diversifying-and-expanding-frequency-adaptive-convolution-kernels-for-sound-event-detection-2406.05341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diversifying-and-expanding-frequency-adaptive-convolution-kernels-for-sound-event-detection-2406.05341"/></url>
<url><loc>https://scifaro.com/en/abs/towards-lightweight-speaker-verification-via-adaptive-neural-network-quantization-2406.05359</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-lightweight-speaker-verification-via-adaptive-neural-network-quantization-2406.05359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-lightweight-speaker-verification-via-adaptive-neural-network-quantization-2406.05359"/></url>
<url><loc>https://scifaro.com/en/abs/should-you-use-a-probabilistic-duration-model-in-tts-probably-especially-for-spontaneous-speech-2406.05401</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/should-you-use-a-probabilistic-duration-model-in-tts-probably-especially-for-spontaneous-speech-2406.05401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/should-you-use-a-probabilistic-duration-model-in-tts-probably-especially-for-spontaneous-speech-2406.05401"/></url>
<url><loc>https://scifaro.com/en/abs/autoregressive-diffusion-transformer-for-text-to-speech-synthesis-2406.05551</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/autoregressive-diffusion-transformer-for-text-to-speech-synthesis-2406.05551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/autoregressive-diffusion-transformer-for-text-to-speech-synthesis-2406.05551"/></url>
<url><loc>https://scifaro.com/en/abs/text-aware-and-context-aware-expressive-audiobook-speech-synthesis-2406.05672</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-aware-and-context-aware-expressive-audiobook-speech-synthesis-2406.05672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-aware-and-context-aware-expressive-audiobook-speech-synthesis-2406.05672"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-noise-robustness-for-flow-matching-based-zero-shot-tts-2406.05699</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-noise-robustness-for-flow-matching-based-zero-shot-tts-2406.05699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-noise-robustness-for-flow-matching-based-zero-shot-tts-2406.05699"/></url>
<url><loc>https://scifaro.com/en/abs/wenetspeech4tts-a-12-800-hour-mandarin-tts-corpus-for-large-speech-generation-model-benchmark-2406.05763</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wenetspeech4tts-a-12-800-hour-mandarin-tts-corpus-for-large-speech-generation-model-benchmark-2406.05763"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wenetspeech4tts-a-12-800-hour-mandarin-tts-corpus-for-large-speech-generation-model-benchmark-2406.05763"/></url>
<url><loc>https://scifaro.com/en/abs/mala-asr-multimedia-assisted-llm-based-asr-2406.05839</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mala-asr-multimedia-assisted-llm-based-asr-2406.05839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mala-asr-multimedia-assisted-llm-based-asr-2406.05839"/></url>
<url><loc>https://scifaro.com/en/abs/soundscape-captioning-using-sound-affective-quality-network-and-large-language-model-2406.05914</loc><lastmod>2025-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundscape-captioning-using-sound-affective-quality-network-and-large-language-model-2406.05914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundscape-captioning-using-sound-affective-quality-network-and-large-language-model-2406.05914"/></url>
<url><loc>https://scifaro.com/en/abs/accent-conversion-with-articulatory-representations-2406.05947</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accent-conversion-with-articulatory-representations-2406.05947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accent-conversion-with-articulatory-representations-2406.05947"/></url>
<url><loc>https://scifaro.com/en/abs/bs-plcnet-2-two-stage-band-split-packet-loss-concealment-network-with-intra-model-knowledge-distillation-2406.05961</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bs-plcnet-2-two-stage-band-split-packet-loss-concealment-network-with-intra-model-knowledge-distillation-2406.05961"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bs-plcnet-2-two-stage-band-split-packet-loss-concealment-network-with-intra-model-knowledge-distillation-2406.05961"/></url>
<url><loc>https://scifaro.com/en/abs/makesinger-a-semi-supervised-training-method-for-data-efficient-singing-voice-synthesis-via-classifier-free-diffusion-guidance-2406.05965</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/makesinger-a-semi-supervised-training-method-for-data-efficient-singing-voice-synthesis-via-classifier-free-diffusion-guidance-2406.05965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/makesinger-a-semi-supervised-training-method-for-data-efficient-singing-voice-synthesis-via-classifier-free-diffusion-guidance-2406.05965"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-large-language-models-with-audio-for-general-purpose-speech-summarization-2406.05968</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-large-language-models-with-audio-for-general-purpose-speech-summarization-2406.05968"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-large-language-models-with-audio-for-general-purpose-speech-summarization-2406.05968"/></url>
<url><loc>https://scifaro.com/en/abs/separate-and-reconstruct-asymmetric-encoder-decoder-for-speech-separation-2406.05983</loc><lastmod>2026-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-and-reconstruct-asymmetric-encoder-decoder-for-speech-separation-2406.05983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-and-reconstruct-asymmetric-encoder-decoder-for-speech-separation-2406.05983"/></url>
<url><loc>https://scifaro.com/en/abs/jengan-stacked-shifted-filters-in-gan-based-speech-synthesis-2406.06111</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jengan-stacked-shifted-filters-in-gan-based-speech-synthesis-2406.06111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jengan-stacked-shifted-filters-in-gan-based-speech-synthesis-2406.06111"/></url>
<url><loc>https://scifaro.com/en/abs/the-effect-of-training-dataset-size-on-discriminative-and-diffusion-based-speech-enhancement-systems-2406.06160</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-effect-of-training-dataset-size-on-discriminative-and-diffusion-based-speech-enhancement-systems-2406.06160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-effect-of-training-dataset-size-on-discriminative-and-diffusion-based-speech-enhancement-systems-2406.06160"/></url>
<url><loc>https://scifaro.com/en/abs/ears-an-anechoic-fullband-speech-dataset-benchmarked-for-speech-enhancement-and-dereverberation-2406.06185</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ears-an-anechoic-fullband-speech-dataset-benchmarked-for-speech-enhancement-and-dereverberation-2406.06185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ears-an-anechoic-fullband-speech-dataset-benchmarked-for-speech-enhancement-and-dereverberation-2406.06185"/></url>
<url><loc>https://scifaro.com/en/abs/label-looping-highly-efficient-decoding-for-transducers-2406.06220</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/label-looping-highly-efficient-decoding-for-transducers-2406.06220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/label-looping-highly-efficient-decoding-for-transducers-2406.06220"/></url>
<url><loc>https://scifaro.com/en/abs/learning-fine-grained-controllability-on-speech-generation-via-efficient-fine-tuning-2406.06251</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-fine-grained-controllability-on-speech-generation-via-efficient-fine-tuning-2406.06251"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-fine-grained-controllability-on-speech-generation-via-efficient-fine-tuning-2406.06251"/></url>
<url><loc>https://scifaro.com/en/abs/sample-rate-independent-recurrent-neural-networks-for-audio-effects-processing-2406.06293</loc><lastmod>2024-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sample-rate-independent-recurrent-neural-networks-for-audio-effects-processing-2406.06293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sample-rate-independent-recurrent-neural-networks-for-audio-effects-processing-2406.06293"/></url>
<url><loc>https://scifaro.com/en/abs/lora-whisper-parameter-efficient-and-extensible-multilingual-asr-2406.06619</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lora-whisper-parameter-efficient-and-extensible-multilingual-asr-2406.06619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lora-whisper-parameter-efficient-and-extensible-multilingual-asr-2406.06619"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-binarization-for-fast-keyword-spotting-2406.06634</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-binarization-for-fast-keyword-spotting-2406.06634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-binarization-for-fast-keyword-spotting-2406.06634"/></url>
<url><loc>https://scifaro.com/en/abs/emotion-aware-speech-self-supervised-representation-learning-with-intensity-knowledge-2406.06646</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotion-aware-speech-self-supervised-representation-learning-with-intensity-knowledge-2406.06646"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotion-aware-speech-self-supervised-representation-learning-with-intensity-knowledge-2406.06646"/></url>
<url><loc>https://scifaro.com/en/abs/astra-aligning-speech-and-text-representations-for-asr-without-sampling-2406.06664</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/astra-aligning-speech-and-text-representations-for-asr-without-sampling-2406.06664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/astra-aligning-speech-and-text-representations-for-asr-without-sampling-2406.06664"/></url>
<url><loc>https://scifaro.com/en/abs/comfeat-combination-of-neural-and-spectral-features-for-improved-depression-detection-2406.06774</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comfeat-combination-of-neural-and-spectral-features-for-improved-depression-detection-2406.06774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comfeat-combination-of-neural-and-spectral-features-for-improved-depression-detection-2406.06774"/></url>
<url><loc>https://scifaro.com/en/abs/persona-an-application-for-emotion-recognition-gender-recognition-and-age-estimation-2406.06781</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/persona-an-application-for-emotion-recognition-gender-recognition-and-age-estimation-2406.06781"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/persona-an-application-for-emotion-recognition-gender-recognition-and-age-estimation-2406.06781"/></url>
<url><loc>https://scifaro.com/en/abs/the-reasonable-effectiveness-of-speaker-embeddings-for-violence-detection-2406.06798</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-reasonable-effectiveness-of-speaker-embeddings-for-violence-detection-2406.06798"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-reasonable-effectiveness-of-speaker-embeddings-for-violence-detection-2406.06798"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-language-corpora-augmentation-with-domain-specific-voice-cloned-speech-2406.07090</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-language-corpora-augmentation-with-domain-specific-voice-cloned-speech-2406.07090"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-language-corpora-augmentation-with-domain-specific-voice-cloned-speech-2406.07090"/></url>
<url><loc>https://scifaro.com/en/abs/fast-context-biasing-for-ctc-and-transducer-asr-models-with-ctc-based-word-spotter-2406.07096</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-context-biasing-for-ctc-and-transducer-asr-models-with-ctc-based-word-spotter-2406.07096"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-context-biasing-for-ctc-and-transducer-asr-models-with-ctc-based-word-spotter-2406.07096"/></url>
<url><loc>https://scifaro.com/en/abs/mr-rawnet-speaker-verification-system-with-multiple-temporal-resolutions-for-variable-duration-utterances-using-raw-waveforms-2406.07103</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mr-rawnet-speaker-verification-system-with-multiple-temporal-resolutions-for-variable-duration-utterances-using-raw-waveforms-2406.07103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mr-rawnet-speaker-verification-system-with-multiple-temporal-resolutions-for-variable-duration-utterances-using-raw-waveforms-2406.07103"/></url>
<url><loc>https://scifaro.com/en/abs/translating-speech-with-just-images-2406.07133</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/translating-speech-with-just-images-2406.07133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/translating-speech-with-just-images-2406.07133"/></url>
<url><loc>https://scifaro.com/en/abs/target-speech-diarization-with-multimodal-prompts-2406.07198</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speech-diarization-with-multimodal-prompts-2406.07198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speech-diarization-with-multimodal-prompts-2406.07198"/></url>
<url><loc>https://scifaro.com/en/abs/codecfake-enhancing-anti-spoofing-models-against-deepfake-audios-from-codec-based-speech-synthesis-systems-2406.07237</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codecfake-enhancing-anti-spoofing-models-against-deepfake-audios-from-codec-based-speech-synthesis-systems-2406.07237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codecfake-enhancing-anti-spoofing-models-against-deepfake-audios-from-codec-based-speech-synthesis-systems-2406.07237"/></url>
<url><loc>https://scifaro.com/en/abs/description-and-discussion-on-dcase-2024-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2406.07250</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2024-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2406.07250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-and-discussion-on-dcase-2024-challenge-task-2-first-shot-unsupervised-anomalous-sound-detection-for-machine-condition-monitoring-2406.07250"/></url>
<url><loc>https://scifaro.com/en/abs/mm-kws-multi-modal-prompts-for-multilingual-user-defined-keyword-spotting-2406.07310</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mm-kws-multi-modal-prompts-for-multilingual-user-defined-keyword-spotting-2406.07310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mm-kws-multi-modal-prompts-for-multilingual-user-defined-keyword-spotting-2406.07310"/></url>
<url><loc>https://scifaro.com/en/abs/clever-hans-effect-found-in-automatic-detection-of-alzheimer-s-disease-through-speech-2406.07410</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clever-hans-effect-found-in-automatic-detection-of-alzheimer-s-disease-through-speech-2406.07410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clever-hans-effect-found-in-automatic-detection-of-alzheimer-s-disease-through-speech-2406.07410"/></url>
<url><loc>https://scifaro.com/en/abs/single-codec-single-codebook-speech-codec-towards-high-performance-speech-generation-2406.07422</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-codec-single-codebook-speech-codec-towards-high-performance-speech-generation-2406.07422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-codec-single-codebook-speech-codec-towards-high-performance-speech-generation-2406.07422"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-speech-separation-with-fast-generative-correction-2406.07461</loc><lastmod>2024-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-speech-separation-with-fast-generative-correction-2406.07461"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-speech-separation-with-fast-generative-correction-2406.07461"/></url>
<url><loc>https://scifaro.com/en/abs/towards-objective-and-interpretable-speech-disorder-assessment-a-comparative-analysis-of-cnn-and-transformer-based-models-2406.07576</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-objective-and-interpretable-speech-disorder-assessment-a-comparative-analysis-of-cnn-and-transformer-based-models-2406.07576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-objective-and-interpretable-speech-disorder-assessment-a-comparative-analysis-of-cnn-and-transformer-based-models-2406.07576"/></url>
<url><loc>https://scifaro.com/en/abs/spoof-diarization-what-spoofed-when-in-partially-spoofed-audio-2406.07816</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoof-diarization-what-spoofed-when-in-partially-spoofed-audio-2406.07816"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoof-diarization-what-spoofed-when-in-partially-spoofed-audio-2406.07816"/></url>
<url><loc>https://scifaro.com/en/abs/dual-pipeline-with-low-rank-adaptation-for-new-language-integration-in-multilingual-asr-2406.07842</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dual-pipeline-with-low-rank-adaptation-for-new-language-integration-in-multilingual-asr-2406.07842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dual-pipeline-with-low-rank-adaptation-for-new-language-integration-in-multilingual-asr-2406.07842"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-extraction-with-curriculum-learning-2406.07845</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-extraction-with-curriculum-learning-2406.07845"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-extraction-with-curriculum-learning-2406.07845"/></url>
<url><loc>https://scifaro.com/en/abs/dualvc-3-leveraging-language-model-generated-pseudo-context-for-end-to-end-low-latency-streaming-voice-conversion-2406.07846</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualvc-3-leveraging-language-model-generated-pseudo-context-for-end-to-end-low-latency-streaming-voice-conversion-2406.07846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualvc-3-leveraging-language-model-generated-pseudo-context-for-end-to-end-low-latency-streaming-voice-conversion-2406.07846"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-speech-foundation-models-for-speaker-diarization-in-child-adult-dyadic-interactions-2406.07890</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-speech-foundation-models-for-speaker-diarization-in-child-adult-dyadic-interactions-2406.07890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-speech-foundation-models-for-speaker-diarization-in-child-adult-dyadic-interactions-2406.07890"/></url>
<url><loc>https://scifaro.com/en/abs/guiding-frame-level-ctc-alignments-using-self-knowledge-distillation-2406.07909</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guiding-frame-level-ctc-alignments-using-self-knowledge-distillation-2406.07909"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guiding-frame-level-ctc-alignments-using-self-knowledge-distillation-2406.07909"/></url>
<url><loc>https://scifaro.com/en/abs/libritts-p-a-corpus-with-speaking-style-and-speaker-identity-prompts-for-text-to-speech-and-style-captioning-2406.07969</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/libritts-p-a-corpus-with-speaking-style-and-speaker-identity-prompts-for-text-to-speech-and-style-captioning-2406.07969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/libritts-p-a-corpus-with-speaking-style-and-speaker-identity-prompts-for-text-to-speech-and-style-captioning-2406.07969"/></url>
<url><loc>https://scifaro.com/en/abs/dcase-2024-task-4-sound-event-detection-with-heterogeneous-data-and-missing-labels-2406.08056</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcase-2024-task-4-sound-event-detection-with-heterogeneous-data-and-missing-labels-2406.08056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcase-2024-task-4-sound-event-detection-with-heterogeneous-data-and-missing-labels-2406.08056"/></url>
<url><loc>https://scifaro.com/en/abs/vecl-tts-voice-identity-and-emotional-style-controllable-cross-lingual-text-to-speech-2406.08076</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vecl-tts-voice-identity-and-emotional-style-controllable-cross-lingual-text-to-speech-2406.08076"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vecl-tts-voice-identity-and-emotional-style-controllable-cross-lingual-text-to-speech-2406.08076"/></url>
<url><loc>https://scifaro.com/en/abs/audio-conditioned-phonemic-and-prosodic-annotation-for-building-text-to-speech-models-from-unlabeled-speech-data-2406.08111</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-conditioned-phonemic-and-prosodic-annotation-for-building-text-to-speech-models-from-unlabeled-speech-data-2406.08111"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-conditioned-phonemic-and-prosodic-annotation-for-building-text-to-speech-models-from-unlabeled-speech-data-2406.08111"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-parallel-attention-convolution-network-2406.08119</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-parallel-attention-convolution-network-2406.08119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-using-parallel-attention-convolution-network-2406.08119"/></url>
<url><loc>https://scifaro.com/en/abs/fully-few-shot-class-incremental-audio-classification-using-expandable-dual-embedding-extractor-2406.08122</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fully-few-shot-class-incremental-audio-classification-using-expandable-dual-embedding-extractor-2406.08122"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fully-few-shot-class-incremental-audio-classification-using-expandable-dual-embedding-extractor-2406.08122"/></url>
<url><loc>https://scifaro.com/en/abs/lafma-a-latent-flow-matching-model-for-text-to-audio-generation-2406.08203</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lafma-a-latent-flow-matching-model-for-text-to-audio-generation-2406.08203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lafma-a-latent-flow-matching-model-for-text-to-audio-generation-2406.08203"/></url>
<url><loc>https://scifaro.com/en/abs/transformer-based-model-for-asr-n-best-rescoring-and-rewriting-2406.08207</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transformer-based-model-for-asr-n-best-rescoring-and-rewriting-2406.08207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transformer-based-model-for-asr-n-best-rescoring-and-rewriting-2406.08207"/></url>
<url><loc>https://scifaro.com/en/abs/refining-self-supervised-learnt-speech-representation-using-brain-activations-2406.08266</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refining-self-supervised-learnt-speech-representation-using-brain-activations-2406.08266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refining-self-supervised-learnt-speech-representation-using-brain-activations-2406.08266"/></url>
<url><loc>https://scifaro.com/en/abs/multimodal-representation-loss-between-timed-text-and-audio-for-regularized-speech-separation-2406.08328</loc><lastmod>2025-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multimodal-representation-loss-between-timed-text-and-audio-for-regularized-speech-separation-2406.08328"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multimodal-representation-loss-between-timed-text-and-audio-for-regularized-speech-separation-2406.08328"/></url>
<url><loc>https://scifaro.com/en/abs/speech-emotion-recognition-with-asr-transcripts-a-comprehensive-study-on-word-error-rate-and-fusion-techniques-2406.08353</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-asr-transcripts-a-comprehensive-study-on-word-error-rate-and-fusion-techniques-2406.08353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-emotion-recognition-with-asr-transcripts-a-comprehensive-study-on-word-error-rate-and-fusion-techniques-2406.08353"/></url>
<url><loc>https://scifaro.com/en/abs/scdnet-self-supervised-learning-feature-based-speaker-change-detection-2406.08393</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scdnet-self-supervised-learning-feature-based-speaker-change-detection-2406.08393"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scdnet-self-supervised-learning-feature-based-speaker-change-detection-2406.08393"/></url>
<url><loc>https://scifaro.com/en/abs/neural-blind-source-separation-and-diarization-for-distant-speech-recognition-2406.08396</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-blind-source-separation-and-diarization-for-distant-speech-recognition-2406.08396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-blind-source-separation-and-diarization-for-distant-speech-recognition-2406.08396"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-sounds-missing-the-questions-the-challenge-of-object-hallucination-in-large-audio-language-models-2406.08402</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-sounds-missing-the-questions-the-challenge-of-object-hallucination-in-large-audio-language-models-2406.08402"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-sounds-missing-the-questions-the-challenge-of-object-hallucination-in-large-audio-language-models-2406.08402"/></url>
<url><loc>https://scifaro.com/en/abs/svsnet-enhancing-speaker-voice-similarity-assessment-models-with-representations-from-speech-foundation-models-2406.08445</loc><lastmod>2024-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svsnet-enhancing-speaker-voice-similarity-assessment-models-with-representations-from-speech-foundation-models-2406.08445"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svsnet-enhancing-speaker-voice-similarity-assessment-models-with-representations-from-speech-foundation-models-2406.08445"/></url>
<url><loc>https://scifaro.com/en/abs/db3v-a-dialect-dominated-dataset-of-bird-vocalisation-for-cross-corpus-bird-species-recognition-2406.08517</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/db3v-a-dialect-dominated-dataset-of-bird-vocalisation-for-cross-corpus-bird-species-recognition-2406.08517"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/db3v-a-dialect-dominated-dataset-of-bird-vocalisation-for-cross-corpus-bird-species-recognition-2406.08517"/></url>
<url><loc>https://scifaro.com/en/abs/dubwise-video-guided-speech-duration-control-in-multimodal-llm-based-text-to-speech-for-dubbing-2406.08802</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dubwise-video-guided-speech-duration-control-in-multimodal-llm-based-text-to-speech-for-dubbing-2406.08802"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dubwise-video-guided-speech-duration-control-in-multimodal-llm-based-text-to-speech-for-dubbing-2406.08802"/></url>
<url><loc>https://scifaro.com/en/abs/disfluencyspeech-single-speaker-conversational-speech-dataset-with-paralanguage-2406.08820</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disfluencyspeech-single-speaker-conversational-speech-dataset-with-paralanguage-2406.08820"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disfluencyspeech-single-speaker-conversational-speech-dataset-with-paralanguage-2406.08820"/></url>
<url><loc>https://scifaro.com/en/abs/on-improving-error-resilience-of-neural-end-to-end-speech-coders-2406.08900</loc><lastmod>2025-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-improving-error-resilience-of-neural-end-to-end-speech-coders-2406.08900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-improving-error-resilience-of-neural-end-to-end-speech-coders-2406.08900"/></url>
<url><loc>https://scifaro.com/en/abs/tool-wear-prediction-in-cnc-turning-operations-using-ultrasonic-microphone-arrays-and-cnns-2406.08957</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tool-wear-prediction-in-cnc-turning-operations-using-ultrasonic-microphone-arrays-and-cnns-2406.08957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tool-wear-prediction-in-cnc-turning-operations-using-ultrasonic-microphone-arrays-and-cnns-2406.08957"/></url>
<url><loc>https://scifaro.com/en/abs/cascaded-noise-reduction-and-acoustic-echo-cancellation-based-on-an-extended-noise-reduction-2406.08974</loc><lastmod>2024-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cascaded-noise-reduction-and-acoustic-echo-cancellation-based-on-an-extended-noise-reduction-2406.08974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cascaded-noise-reduction-and-acoustic-echo-cancellation-based-on-an-extended-noise-reduction-2406.08974"/></url>
<url><loc>https://scifaro.com/en/abs/toneunit-a-speech-discretization-approach-for-tonal-language-speech-synthesis-2406.08989</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toneunit-a-speech-discretization-approach-for-tonal-language-speech-synthesis-2406.08989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toneunit-a-speech-discretization-approach-for-tonal-language-speech-synthesis-2406.08989"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-streaming-model-for-low-latency-speech-anonymization-2406.09277</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-streaming-model-for-low-latency-speech-anonymization-2406.09277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-streaming-model-for-low-latency-speech-anonymization-2406.09277"/></url>
<url><loc>https://scifaro.com/en/abs/flowavse-efficient-audio-visual-speech-enhancement-with-conditional-flow-matching-2406.09286</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowavse-efficient-audio-visual-speech-enhancement-with-conditional-flow-matching-2406.09286"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowavse-efficient-audio-visual-speech-enhancement-with-conditional-flow-matching-2406.09286"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-spoken-language-identification-strategies-for-automatic-transcription-of-multilingual-broadcast-and-institutional-speech-2406.09290</loc><lastmod>2024-06-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-spoken-language-identification-strategies-for-automatic-transcription-of-multilingual-broadcast-and-institutional-speech-2406.09290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-spoken-language-identification-strategies-for-automatic-transcription-of-multilingual-broadcast-and-institutional-speech-2406.09290"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-personalized-voice-activity-detection-systems-assessing-real-world-effectiveness-2406.09443</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-personalized-voice-activity-detection-systems-assessing-real-world-effectiveness-2406.09443"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-personalized-voice-activity-detection-systems-assessing-real-world-effectiveness-2406.09443"/></url>
<url><loc>https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-an-autoregressive-generative-model-2406.09444</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-an-autoregressive-generative-model-2406.09444"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gendistiller-distilling-pre-trained-language-models-based-on-an-autoregressive-generative-model-2406.09444"/></url>
<url><loc>https://scifaro.com/en/abs/the-second-displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2406.09494</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-second-displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2406.09494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-second-displace-challenge-diarization-of-speaker-and-language-in-conversational-environments-2406.09494"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-target-speaker-s-solo-segment-2406.09589</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-target-speaker-s-solo-segment-2406.09589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-multi-speaker-asr-using-target-speaker-s-solo-segment-2406.09589"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-personalization-of-amplification-in-hearing-aids-via-multi-band-bayesian-machine-learning-2406.09634</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-personalization-of-amplification-in-hearing-aids-via-multi-band-bayesian-machine-learning-2406.09634"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-personalization-of-amplification-in-hearing-aids-via-multi-band-bayesian-machine-learning-2406.09634"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-byte-level-representation-for-end-to-end-asr-2406.09676</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-byte-level-representation-for-end-to-end-asr-2406.09676"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-byte-level-representation-for-end-to-end-asr-2406.09676"/></url>
<url><loc>https://scifaro.com/en/abs/a-multimodal-framework-for-the-assessment-of-the-schizophrenia-spectrum-2406.09706</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-multimodal-framework-for-the-assessment-of-the-schizophrenia-spectrum-2406.09706"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-multimodal-framework-for-the-assessment-of-the-schizophrenia-spectrum-2406.09706"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-deep-speech-separation-in-clustered-ad-hoc-distributed-microphone-environments-2406.09819</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-deep-speech-separation-in-clustered-ad-hoc-distributed-microphone-environments-2406.09819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-deep-speech-separation-in-clustered-ad-hoc-distributed-microphone-environments-2406.09819"/></url>
<url><loc>https://scifaro.com/en/abs/low-algorithmic-delay-implementation-of-convolutional-beamformer-for-online-joint-source-separation-and-dereverberation-2406.09821</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-algorithmic-delay-implementation-of-convolutional-beamformer-for-online-joint-source-separation-and-dereverberation-2406.09821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-algorithmic-delay-implementation-of-convolutional-beamformer-for-online-joint-source-separation-and-dereverberation-2406.09821"/></url>
<url><loc>https://scifaro.com/en/abs/perceiver-prompt-flexible-speaker-adaptation-in-whisper-for-chinese-disordered-speech-recognition-2406.09873</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceiver-prompt-flexible-speaker-adaptation-in-whisper-for-chinese-disordered-speech-recognition-2406.09873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceiver-prompt-flexible-speaker-adaptation-in-whisper-for-chinese-disordered-speech-recognition-2406.09873"/></url>
<url><loc>https://scifaro.com/en/abs/period-singer-integrating-periodic-and-aperiodic-variational-autoencoders-for-natural-sounding-end-to-end-singing-voice-synthesis-2406.09894</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/period-singer-integrating-periodic-and-aperiodic-variational-autoencoders-for-natural-sounding-end-to-end-singing-voice-synthesis-2406.09894"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/period-singer-integrating-periodic-and-aperiodic-variational-autoencoders-for-natural-sounding-end-to-end-singing-voice-synthesis-2406.09894"/></url>
<url><loc>https://scifaro.com/en/abs/understanding-pedestrian-movement-using-urban-sensing-technologies-the-promise-of-audio-based-sensors-2406.09998</loc><lastmod>2025-08-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/understanding-pedestrian-movement-using-urban-sensing-technologies-the-promise-of-audio-based-sensors-2406.09998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/understanding-pedestrian-movement-using-urban-sensing-technologies-the-promise-of-audio-based-sensors-2406.09998"/></url>
<url><loc>https://scifaro.com/en/abs/roar-reinforcing-original-to-augmented-data-ratio-dynamics-for-wav2vec2-0-based-asr-2406.09999</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/roar-reinforcing-original-to-augmented-data-ratio-dynamics-for-wav2vec2-0-based-asr-2406.09999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/roar-reinforcing-original-to-augmented-data-ratio-dynamics-for-wav2vec2-0-based-asr-2406.09999"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-the-terminality-of-speech-turn-boundary-for-spoken-interactions-in-french-tv-and-radio-content-2406.10073</loc><lastmod>2024-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-the-terminality-of-speech-turn-boundary-for-spoken-interactions-in-french-tv-and-radio-content-2406.10073"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-the-terminality-of-speech-turn-boundary-for-spoken-interactions-in-french-tv-and-radio-content-2406.10073"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-flamingo-integrating-visual-features-into-whisper-for-audio-visual-speech-recognition-and-translation-2406.10082</loc><lastmod>2024-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-flamingo-integrating-visual-features-into-whisper-for-audio-visual-speech-recognition-and-translation-2406.10082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-flamingo-integrating-visual-features-into-whisper-for-audio-visual-speech-recognition-and-translation-2406.10082"/></url>
<url><loc>https://scifaro.com/en/abs/inclusive-asr-for-disfluent-speech-cascaded-large-scale-self-supervised-learning-with-targeted-fine-tuning-and-data-augmentation-2406.10177</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inclusive-asr-for-disfluent-speech-cascaded-large-scale-self-supervised-learning-with-targeted-fine-tuning-and-data-augmentation-2406.10177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inclusive-asr-for-disfluent-speech-cascaded-large-scale-self-supervised-learning-with-targeted-fine-tuning-and-data-augmentation-2406.10177"/></url>
<url><loc>https://scifaro.com/en/abs/alignnet-learning-dataset-score-alignment-functions-to-enable-better-training-of-speech-quality-estimators-2406.10205</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignnet-learning-dataset-score-alignment-functions-to-enable-better-training-of-speech-quality-estimators-2406.10205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignnet-learning-dataset-score-alignment-functions-to-enable-better-training-of-speech-quality-estimators-2406.10205"/></url>
<url><loc>https://scifaro.com/en/abs/gender-representation-in-tv-and-radio-automatic-information-extraction-methods-versus-manual-analyses-2406.10316</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gender-representation-in-tv-and-radio-automatic-information-extraction-methods-versus-manual-analyses-2406.10316"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gender-representation-in-tv-and-radio-automatic-information-extraction-methods-versus-manual-analyses-2406.10316"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-speaker-identity-coding-in-self-supervised-models-and-humans-2406.10401</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-speaker-identity-coding-in-self-supervised-models-and-humans-2406.10401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-speaker-identity-coding-in-self-supervised-models-and-humans-2406.10401"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-discretized-saliency-maps-for-explainable-detection-of-ai-generated-voice-2406.10422</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-discretized-saliency-maps-for-explainable-detection-of-ai-generated-voice-2406.10422"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-discretized-saliency-maps-for-explainable-detection-of-ai-generated-voice-2406.10422"/></url>
<url><loc>https://scifaro.com/en/abs/avr-synergizing-foundation-models-for-audio-visual-humor-detection-2406.10448</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avr-synergizing-foundation-models-for-audio-visual-humor-detection-2406.10448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avr-synergizing-foundation-models-for-audio-visual-humor-detection-2406.10448"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-children-s-asr-with-supervised-and-self-supervised-speech-foundation-models-2406.10507</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-children-s-asr-with-supervised-and-self-supervised-speech-foundation-models-2406.10507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-children-s-asr-with-supervised-and-self-supervised-speech-foundation-models-2406.10507"/></url>
<url><loc>https://scifaro.com/en/abs/soa-reducing-domain-mismatch-in-ssl-pipeline-by-speech-only-adaptation-for-low-resource-asr-2406.10512</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soa-reducing-domain-mismatch-in-ssl-pipeline-by-speech-only-adaptation-for-low-resource-asr-2406.10512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soa-reducing-domain-mismatch-in-ssl-pipeline-by-speech-only-adaptation-for-low-resource-asr-2406.10512"/></url>
<url><loc>https://scifaro.com/en/abs/gtr-voice-articulatory-phonetics-informed-controllable-expressive-speech-synthesis-2406.10514</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gtr-voice-articulatory-phonetics-informed-controllable-expressive-speech-synthesis-2406.10514"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gtr-voice-articulatory-phonetics-informed-controllable-expressive-speech-synthesis-2406.10514"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-audio-segmentation-for-long-form-speech-translation-2406.10549</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-audio-segmentation-for-long-form-speech-translation-2406.10549"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-audio-segmentation-for-long-form-speech-translation-2406.10549"/></url>
<url><loc>https://scifaro.com/en/abs/mint-a-multi-modal-image-and-narrative-text-dubbing-dataset-for-foley-audio-content-planning-and-generation-2406.10591</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mint-a-multi-modal-image-and-narrative-text-dubbing-dataset-for-foley-audio-content-planning-and-generation-2406.10591"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mint-a-multi-modal-image-and-narrative-text-dubbing-dataset-for-foley-audio-content-planning-and-generation-2406.10591"/></url>
<url><loc>https://scifaro.com/en/abs/double-multi-head-attention-multimodal-system-for-odyssey-2024-speech-emotion-recognition-challenge-2406.10598</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/double-multi-head-attention-multimodal-system-for-odyssey-2024-speech-emotion-recognition-challenge-2406.10598"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/double-multi-head-attention-multimodal-system-for-odyssey-2024-speech-emotion-recognition-challenge-2406.10598"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-and-improving-scoring-fusion-for-spoofing-aware-speaker-verification-using-compositional-data-analysis-2406.10836</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-and-improving-scoring-fusion-for-spoofing-aware-speaker-verification-using-compositional-data-analysis-2406.10836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-and-improving-scoring-fusion-for-spoofing-aware-speaker-verification-using-compositional-data-analysis-2406.10836"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-accent-modeling-and-disentangling-for-multi-speaker-multi-accent-text-to-speech-synthesis-2406.10844</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-accent-modeling-and-disentangling-for-multi-speaker-multi-accent-text-to-speech-synthesis-2406.10844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-accent-modeling-and-disentangling-for-multi-speaker-multi-accent-text-to-speech-synthesis-2406.10844"/></url>
<url><loc>https://scifaro.com/en/abs/continual-test-time-adaptation-for-end-to-end-speech-recognition-on-noisy-speech-2406.11064</loc><lastmod>2024-10-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/continual-test-time-adaptation-for-end-to-end-speech-recognition-on-noisy-speech-2406.11064"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/continual-test-time-adaptation-for-end-to-end-speech-recognition-on-noisy-speech-2406.11064"/></url>
<url><loc>https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2406.11169</loc><lastmod>2024-12-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2406.11169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-distillation-prototypes-network-learning-robust-speaker-representations-without-supervision-2406.11169"/></url>
<url><loc>https://scifaro.com/en/abs/performance-improvement-of-language-queried-audio-source-separation-based-on-caption-augmentation-from-large-language-models-for-dcase-challenge-2024-task-9-2406.11248</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-improvement-of-language-queried-audio-source-separation-based-on-caption-augmentation-from-large-language-models-for-dcase-challenge-2024-task-9-2406.11248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-improvement-of-language-queried-audio-source-separation-based-on-caption-augmentation-from-large-language-models-for-dcase-challenge-2024-task-9-2406.11248"/></url>
<url><loc>https://scifaro.com/en/abs/spatially-constrained-vs-unconstrained-filtering-in-neural-spatiospectral-filters-for-multichannel-speech-enhancement-2406.11376</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatially-constrained-vs-unconstrained-filtering-in-neural-spatiospectral-filters-for-multichannel-speech-enhancement-2406.11376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatially-constrained-vs-unconstrained-filtering-in-neural-spatiospectral-filters-for-multichannel-speech-enhancement-2406.11376"/></url>
<url><loc>https://scifaro.com/en/abs/an-exploration-of-length-generalization-in-transformer-based-speech-enhancement-2406.11401</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exploration-of-length-generalization-in-transformer-based-speech-enhancement-2406.11401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exploration-of-length-generalization-in-transformer-based-speech-enhancement-2406.11401"/></url>
<url><loc>https://scifaro.com/en/abs/ditto-tts-diffusion-transformers-for-scalable-text-to-speech-without-domain-specific-factors-2406.11427</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditto-tts-diffusion-transformers-for-scalable-text-to-speech-without-domain-specific-factors-2406.11427"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditto-tts-diffusion-transformers-for-scalable-text-to-speech-without-domain-specific-factors-2406.11427"/></url>
<url><loc>https://scifaro.com/en/abs/gigaspeech-2-an-evolving-large-scale-and-multi-domain-asr-corpus-for-low-resource-languages-with-automated-crawling-transcription-and-refinement-2406.11546</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gigaspeech-2-an-evolving-large-scale-and-multi-domain-asr-corpus-for-low-resource-languages-with-automated-crawling-transcription-and-refinement-2406.11546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gigaspeech-2-an-evolving-large-scale-and-multi-domain-asr-corpus-for-low-resource-languages-with-automated-crawling-transcription-and-refinement-2406.11546"/></url>
<url><loc>https://scifaro.com/en/abs/av-crossnet-an-audiovisual-complex-spectral-mapping-network-for-speech-separation-by-leveraging-narrow-and-cross-band-modeling-2406.11619</loc><lastmod>2024-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/av-crossnet-an-audiovisual-complex-spectral-mapping-network-for-speech-separation-by-leveraging-narrow-and-cross-band-modeling-2406.11619"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/av-crossnet-an-audiovisual-complex-spectral-mapping-network-for-speech-separation-by-leveraging-narrow-and-cross-band-modeling-2406.11619"/></url>
<url><loc>https://scifaro.com/en/abs/1000-african-voices-advancing-inclusive-multi-speaker-multi-accent-speech-synthesis-2406.11727</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/1000-african-voices-advancing-inclusive-multi-speaker-multi-accent-speech-synthesis-2406.11727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/1000-african-voices-advancing-inclusive-multi-speaker-multi-accent-speech-synthesis-2406.11727"/></url>
<url><loc>https://scifaro.com/en/abs/universal-score-based-speech-enhancement-with-high-content-preservation-2406.12194</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-score-based-speech-enhancement-with-high-content-preservation-2406.12194"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-score-based-speech-enhancement-with-high-content-preservation-2406.12194"/></url>
<url><loc>https://scifaro.com/en/abs/binaural-selective-attention-model-for-target-speaker-extraction-2406.12236</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaural-selective-attention-model-for-target-speaker-extraction-2406.12236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaural-selective-attention-model-for-target-speaker-extraction-2406.12236"/></url>
<url><loc>https://scifaro.com/en/abs/performant-asr-models-for-medical-entities-in-accented-speech-2406.12387</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performant-asr-models-for-medical-entities-in-accented-speech-2406.12387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performant-asr-models-for-medical-entities-in-accented-speech-2406.12387"/></url>
<url><loc>https://scifaro.com/en/abs/text-aware-speech-separation-for-multi-talker-keyword-spotting-2406.12447</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-aware-speech-separation-for-multi-talker-keyword-spotting-2406.12447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-aware-speech-separation-for-multi-talker-keyword-spotting-2406.12447"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-online-continual-learning-for-automatic-speech-recognition-2406.12503</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-online-continual-learning-for-automatic-speech-recognition-2406.12503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-online-continual-learning-for-automatic-speech-recognition-2406.12503"/></url>
<url><loc>https://scifaro.com/en/abs/challenging-margin-based-speaker-embedding-extractors-by-using-the-variational-information-bottleneck-2406.12622</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/challenging-margin-based-speaker-embedding-extractors-by-using-the-variational-information-bottleneck-2406.12622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/challenging-margin-based-speaker-embedding-extractors-by-using-the-variational-information-bottleneck-2406.12622"/></url>
<url><loc>https://scifaro.com/en/abs/transcribe-align-and-segment-creating-speech-datasets-for-low-resource-languages-2406.12674</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcribe-align-and-segment-creating-speech-datasets-for-low-resource-languages-2406.12674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcribe-align-and-segment-creating-speech-datasets-for-low-resource-languages-2406.12674"/></url>
<url><loc>https://scifaro.com/en/abs/speak-in-the-scene-diffusion-based-acoustic-scene-transfer-toward-immersive-speech-generation-2406.12688</loc><lastmod>2024-06-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speak-in-the-scene-diffusion-based-acoustic-scene-transfer-toward-immersive-speech-generation-2406.12688"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speak-in-the-scene-diffusion-based-acoustic-scene-transfer-toward-immersive-speech-generation-2406.12688"/></url>
<url><loc>https://scifaro.com/en/abs/sound-event-detection-based-on-auxiliary-decoder-and-maximum-probability-aggregation-for-dcase-challenge-2024-task-4-2406.12721</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-event-detection-based-on-auxiliary-decoder-and-maximum-probability-aggregation-for-dcase-challenge-2024-task-4-2406.12721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-event-detection-based-on-auxiliary-decoder-and-maximum-probability-aggregation-for-dcase-challenge-2024-task-4-2406.12721"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-speech-recognition-for-biomedical-data-in-bengali-language-2406.12931</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-speech-recognition-for-biomedical-data-in-bengali-language-2406.12931"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-speech-recognition-for-biomedical-data-in-bengali-language-2406.12931"/></url>
<url><loc>https://scifaro.com/en/abs/self-train-before-you-transcribe-2406.12937</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-train-before-you-transcribe-2406.12937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-train-before-you-transcribe-2406.12937"/></url>
<url><loc>https://scifaro.com/en/abs/instruction-data-generation-and-unsupervised-adaptation-for-speech-language-models-2406.12946</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instruction-data-generation-and-unsupervised-adaptation-for-speech-language-models-2406.12946"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instruction-data-generation-and-unsupervised-adaptation-for-speech-language-models-2406.12946"/></url>
<url><loc>https://scifaro.com/en/abs/coding-speech-through-vocal-tract-kinematics-2406.12998</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/coding-speech-through-vocal-tract-kinematics-2406.12998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/coding-speech-through-vocal-tract-kinematics-2406.12998"/></url>
<url><loc>https://scifaro.com/en/abs/audio-fingerprinting-with-holographic-reduced-representations-2406.13139</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-fingerprinting-with-holographic-reduced-representations-2406.13139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-fingerprinting-with-holographic-reduced-representations-2406.13139"/></url>
<url><loc>https://scifaro.com/en/abs/cec-a-noisy-label-detection-method-for-speaker-recognition-2406.13268</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cec-a-noisy-label-detection-method-for-speaker-recognition-2406.13268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cec-a-noisy-label-detection-method-for-speaker-recognition-2406.13268"/></url>
<url><loc>https://scifaro.com/en/abs/pushing-the-limit-of-sound-event-detection-with-multi-dilated-frequency-dynamic-convolution-2406.13312</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pushing-the-limit-of-sound-event-detection-with-multi-dilated-frequency-dynamic-convolution-2406.13312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pushing-the-limit-of-sound-event-detection-with-multi-dilated-frequency-dynamic-convolution-2406.13312"/></url>
<url><loc>https://scifaro.com/en/abs/medical-spoken-named-entity-recognition-2406.13337</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/medical-spoken-named-entity-recognition-2406.13337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/medical-spoken-named-entity-recognition-2406.13337"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-by-design-audio-segmentation-through-non-negative-matrix-factorization-and-probing-2406.13385</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-by-design-audio-segmentation-through-non-negative-matrix-factorization-and-probing-2406.13385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-by-design-audio-segmentation-through-non-negative-matrix-factorization-and-probing-2406.13385"/></url>
<url><loc>https://scifaro.com/en/abs/online-domain-incremental-learning-approach-to-classify-acoustic-scenes-in-all-locations-2406.13386</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/online-domain-incremental-learning-approach-to-classify-acoustic-scenes-in-all-locations-2406.13386"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/online-domain-incremental-learning-approach-to-classify-acoustic-scenes-in-all-locations-2406.13386"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-generative-modeling-with-discriminative-guidance-for-streamable-speech-enhancement-2406.13471</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-generative-modeling-with-discriminative-guidance-for-streamable-speech-enhancement-2406.13471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-generative-modeling-with-discriminative-guidance-for-streamable-speech-enhancement-2406.13471"/></url>
<url><loc>https://scifaro.com/en/abs/conmod-controllable-neural-frame-based-modulation-effects-2406.13935</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conmod-controllable-neural-frame-based-modulation-effects-2406.13935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conmod-controllable-neural-frame-based-modulation-effects-2406.13935"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-vocal-articulations-from-acoustic-latent-representations-2406.14379</loc><lastmod>2024-06-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-vocal-articulations-from-acoustic-latent-representations-2406.14379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-vocal-articulations-from-acoustic-latent-representations-2406.14379"/></url>
<url><loc>https://scifaro.com/en/abs/towards-intelligent-speech-assistants-in-operating-rooms-a-multimodal-model-for-surgical-workflow-analysis-2406.14576</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-intelligent-speech-assistants-in-operating-rooms-a-multimodal-model-for-surgical-workflow-analysis-2406.14576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-intelligent-speech-assistants-in-operating-rooms-a-multimodal-model-for-surgical-workflow-analysis-2406.14576"/></url>
<url><loc>https://scifaro.com/en/abs/voice-disorder-analysis-a-transformer-based-approach-2406.14693</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voice-disorder-analysis-a-transformer-based-approach-2406.14693"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voice-disorder-analysis-a-transformer-based-approach-2406.14693"/></url>
<url><loc>https://scifaro.com/en/abs/dexter-learning-and-controlling-performance-expression-with-diffusion-models-2406.14850</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dexter-learning-and-controlling-performance-expression-with-diffusion-models-2406.14850"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dexter-learning-and-controlling-performance-expression-with-diffusion-models-2406.14850"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-audio-visual-information-fusion-for-sound-event-localization-and-detection-in-low-resource-realistic-scenarios-2406.15160</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-audio-visual-information-fusion-for-sound-event-localization-and-detection-in-low-resource-realistic-scenarios-2406.15160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-audio-visual-information-fusion-for-sound-event-localization-and-detection-in-low-resource-realistic-scenarios-2406.15160"/></url>
<url><loc>https://scifaro.com/en/abs/prompting-whisper-for-qa-driven-zero-shot-end-to-end-spoken-language-understanding-2406.15209</loc><lastmod>2024-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prompting-whisper-for-qa-driven-zero-shot-end-to-end-spoken-language-understanding-2406.15209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prompting-whisper-for-qa-driven-zero-shot-end-to-end-spoken-language-understanding-2406.15209"/></url>
<url><loc>https://scifaro.com/en/abs/self-training-and-ensembling-frequency-dependent-networks-with-coarse-prediction-pooling-and-sound-event-bounding-boxes-2406.15725</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-training-and-ensembling-frequency-dependent-networks-with-coarse-prediction-pooling-and-sound-event-bounding-boxes-2406.15725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-training-and-ensembling-frequency-dependent-networks-with-coarse-prediction-pooling-and-sound-event-bounding-boxes-2406.15725"/></url>
<url><loc>https://scifaro.com/en/abs/tacolm-gated-attention-equipped-codec-language-model-are-efficient-zero-shot-text-to-speech-synthesizers-2406.15752</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tacolm-gated-attention-equipped-codec-language-model-are-efficient-zero-shot-text-to-speech-synthesizers-2406.15752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tacolm-gated-attention-equipped-codec-language-model-are-efficient-zero-shot-text-to-speech-synthesizers-2406.15752"/></url>
<url><loc>https://scifaro.com/en/abs/fusing-audio-and-metadata-embeddings-improves-language-based-audio-retrieval-2406.15897</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fusing-audio-and-metadata-embeddings-improves-language-based-audio-retrieval-2406.15897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fusing-audio-and-metadata-embeddings-improves-language-based-audio-retrieval-2406.15897"/></url>
<url><loc>https://scifaro.com/en/abs/text-queried-target-sound-event-localization-2406.16058</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-queried-target-sound-event-localization-2406.16058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-queried-target-sound-event-localization-2406.16058"/></url>
<url><loc>https://scifaro.com/en/abs/decoder-only-architecture-for-streaming-end-to-end-speech-recognition-2406.16107</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoder-only-architecture-for-streaming-end-to-end-speech-recognition-2406.16107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoder-only-architecture-for-streaming-end-to-end-speech-recognition-2406.16107"/></url>
<url><loc>https://scifaro.com/en/abs/contextualized-end-to-end-automatic-speech-recognition-with-intermediate-biasing-loss-2406.16120</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextualized-end-to-end-automatic-speech-recognition-with-intermediate-biasing-loss-2406.16120"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextualized-end-to-end-automatic-speech-recognition-with-intermediate-biasing-loss-2406.16120"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-foundation-models-and-speech-enhancement-for-parkinson-s-disease-detection-from-speech-in-real-world-operative-conditions-2406.16128</loc><lastmod>2025-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-foundation-models-and-speech-enhancement-for-parkinson-s-disease-detection-from-speech-in-real-world-operative-conditions-2406.16128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-foundation-models-and-speech-enhancement-for-parkinson-s-disease-detection-from-speech-in-real-world-operative-conditions-2406.16128"/></url>
<url><loc>https://scifaro.com/en/abs/dreamvoice-text-guided-voice-conversion-2406.16314</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dreamvoice-text-guided-voice-conversion-2406.16314"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dreamvoice-text-guided-voice-conversion-2406.16314"/></url>
<url><loc>https://scifaro.com/en/abs/song-data-cleansing-for-end-to-end-neural-singer-diarization-using-neural-analysis-and-synthesis-framework-2406.16315</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/song-data-cleansing-for-end-to-end-neural-singer-diarization-using-neural-analysis-and-synthesis-framework-2406.16315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/song-data-cleansing-for-end-to-end-neural-singer-diarization-using-neural-analysis-and-synthesis-framework-2406.16315"/></url>
<url><loc>https://scifaro.com/en/abs/refxvc-cross-lingual-voice-conversion-with-enhanced-reference-leveraging-2406.16326</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/refxvc-cross-lingual-voice-conversion-with-enhanced-reference-leveraging-2406.16326"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/refxvc-cross-lingual-voice-conversion-with-enhanced-reference-leveraging-2406.16326"/></url>
<url><loc>https://scifaro.com/en/abs/one-class-learning-with-adaptive-centroid-shift-for-audio-deepfake-detection-2406.16716</loc><lastmod>2024-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-class-learning-with-adaptive-centroid-shift-for-audio-deepfake-detection-2406.16716"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-class-learning-with-adaptive-centroid-shift-for-audio-deepfake-detection-2406.16716"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-compressibility-of-transformer-based-text-to-music-ttm-models-2406.17159</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-compressibility-of-transformer-based-text-to-music-ttm-models-2406.17159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-compressibility-of-transformer-based-text-to-music-ttm-models-2406.17159"/></url>
<url><loc>https://scifaro.com/en/abs/ag-lsec-audio-grounded-lexical-speaker-error-correction-2406.17266</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ag-lsec-audio-grounded-lexical-speaker-error-correction-2406.17266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ag-lsec-audio-grounded-lexical-speaker-error-correction-2406.17266"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-text-to-speech-via-discrete-tokens-using-token-transducer-and-group-masked-language-model-2406.17310</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-text-to-speech-via-discrete-tokens-using-token-transducer-and-group-masked-language-model-2406.17310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-text-to-speech-via-discrete-tokens-using-token-transducer-and-group-masked-language-model-2406.17310"/></url>
<url><loc>https://scifaro.com/en/abs/towards-building-an-end-to-end-multilingual-automatic-lyrics-transcription-model-2406.17618</loc><lastmod>2024-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-building-an-end-to-end-multilingual-automatic-lyrics-transcription-model-2406.17618"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-building-an-end-to-end-multilingual-automatic-lyrics-transcription-model-2406.17618"/></url>
<url><loc>https://scifaro.com/en/abs/e2-tts-embarrassingly-easy-fully-non-autoregressive-zero-shot-tts-2406.18009</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e2-tts-embarrassingly-easy-fully-non-autoregressive-zero-shot-tts-2406.18009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e2-tts-embarrassingly-easy-fully-non-autoregressive-zero-shot-tts-2406.18009"/></url>
<url><loc>https://scifaro.com/en/abs/on-calibration-of-speech-classification-models-insights-from-energy-based-model-investigations-2406.18065</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-calibration-of-speech-classification-models-insights-from-energy-based-model-investigations-2406.18065"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-calibration-of-speech-classification-models-insights-from-energy-based-model-investigations-2406.18065"/></url>
<url><loc>https://scifaro.com/en/abs/msr-86k-an-evolving-multilingual-corpus-with-86-300-hours-of-transcribed-audio-for-speech-recognition-research-2406.18301</loc><lastmod>2024-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/msr-86k-an-evolving-multilingual-corpus-with-86-300-hours-of-transcribed-audio-for-speech-recognition-research-2406.18301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/msr-86k-an-evolving-multilingual-corpus-with-86-300-hours-of-transcribed-audio-for-speech-recognition-research-2406.18301"/></url>
<url><loc>https://scifaro.com/en/abs/speakers-unembedded-embedding-free-approach-to-long-form-neural-diarization-2406.18679</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speakers-unembedded-embedding-free-approach-to-long-form-neural-diarization-2406.18679"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speakers-unembedded-embedding-free-approach-to-long-form-neural-diarization-2406.18679"/></url>
<url><loc>https://scifaro.com/en/abs/wavrx-a-disease-agnostic-generalizable-and-privacy-preserving-speech-health-diagnostic-model-2406.18731</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavrx-a-disease-agnostic-generalizable-and-privacy-preserving-speech-health-diagnostic-model-2406.18731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavrx-a-disease-agnostic-generalizable-and-privacy-preserving-speech-health-diagnostic-model-2406.18731"/></url>
<url><loc>https://scifaro.com/en/abs/desta-enhancing-speech-language-models-through-descriptive-speech-text-alignment-2406.18871</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/desta-enhancing-speech-language-models-through-descriptive-speech-text-alignment-2406.18871"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/desta-enhancing-speech-language-models-through-descriptive-speech-text-alignment-2406.18871"/></url>
<url><loc>https://scifaro.com/en/abs/applying-llms-for-rescoring-n-best-asr-hypotheses-of-casual-conversations-effects-of-domain-adaptation-and-context-carry-over-2406.18972</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/applying-llms-for-rescoring-n-best-asr-hypotheses-of-casual-conversations-effects-of-domain-adaptation-and-context-carry-over-2406.18972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/applying-llms-for-rescoring-n-best-asr-hypotheses-of-casual-conversations-effects-of-domain-adaptation-and-context-carry-over-2406.18972"/></url>
<url><loc>https://scifaro.com/en/abs/dex-tts-diffusion-based-expressive-text-to-speech-with-style-modeling-on-time-variability-2406.19135</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dex-tts-diffusion-based-expressive-text-to-speech-with-style-modeling-on-time-variability-2406.19135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dex-tts-diffusion-based-expressive-text-to-speech-with-style-modeling-on-time-variability-2406.19135"/></url>
<url><loc>https://scifaro.com/en/abs/tradition-or-innovation-a-comparison-of-modern-asr-methods-for-forced-alignment-2406.19363</loc><lastmod>2024-06-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tradition-or-innovation-a-comparison-of-modern-asr-methods-for-forced-alignment-2406.19363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tradition-or-innovation-a-comparison-of-modern-asr-methods-for-forced-alignment-2406.19363"/></url>
<url><loc>https://scifaro.com/en/abs/fmsg-jless-submission-for-dcase-2024-task4-on-sound-event-detection-with-heterogeneous-training-dataset-and-potentially-missing-labels-2407.00291</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fmsg-jless-submission-for-dcase-2024-task4-on-sound-event-detection-with-heterogeneous-training-dataset-and-potentially-missing-labels-2407.00291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fmsg-jless-submission-for-dcase-2024-task4-on-sound-event-detection-with-heterogeneous-training-dataset-and-potentially-missing-labels-2407.00291"/></url>
<url><loc>https://scifaro.com/en/abs/fly-tts-fast-lightweight-and-high-quality-end-to-end-text-to-speech-synthesis-2407.00753</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fly-tts-fast-lightweight-and-high-quality-end-to-end-text-to-speech-synthesis-2407.00753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fly-tts-fast-lightweight-and-high-quality-end-to-end-text-to-speech-synthesis-2407.00753"/></url>
<url><loc>https://scifaro.com/en/abs/less-forgetting-for-better-generalization-exploring-continual-learning-fine-tuning-methods-for-speech-self-supervised-representations-2407.00756</loc><lastmod>2024-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/less-forgetting-for-better-generalization-exploring-continual-learning-fine-tuning-methods-for-speech-self-supervised-representations-2407.00756"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/less-forgetting-for-better-generalization-exploring-continual-learning-fine-tuning-methods-for-speech-self-supervised-representations-2407.00756"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-approach-for-multimodal-concurrent-speaker-detection-2407.01774</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-approach-for-multimodal-concurrent-speaker-detection-2407.01774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-approach-for-multimodal-concurrent-speaker-detection-2407.01774"/></url>
<url><loc>https://scifaro.com/en/abs/peerrtf-robust-mvdr-beamforming-using-graph-convolutional-network-2407.01779</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/peerrtf-robust-mvdr-beamforming-using-graph-convolutional-network-2407.01779"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/peerrtf-robust-mvdr-beamforming-using-graph-convolutional-network-2407.01779"/></url>
<url><loc>https://scifaro.com/en/abs/speakerbeam-ss-real-time-target-speaker-extraction-with-lightweight-conv-tasnet-and-state-space-modeling-2407.01857</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speakerbeam-ss-real-time-target-speaker-extraction-with-lightweight-conv-tasnet-and-state-space-modeling-2407.01857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speakerbeam-ss-real-time-target-speaker-extraction-with-lightweight-conv-tasnet-and-state-space-modeling-2407.01857"/></url>
<url><loc>https://scifaro.com/en/abs/ttslow-slow-down-text-to-speech-with-efficiency-robustness-evaluations-2407.01927</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ttslow-slow-down-text-to-speech-with-efficiency-robustness-evaluations-2407.01927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ttslow-slow-down-text-to-speech-with-efficiency-robustness-evaluations-2407.01927"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-face-masked-speech-enhancement-using-generative-adversarial-networks-with-human-in-the-loop-assessment-metrics-2407.01939</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-face-masked-speech-enhancement-using-generative-adversarial-networks-with-human-in-the-loop-assessment-metrics-2407.01939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-face-masked-speech-enhancement-using-generative-adversarial-networks-with-human-in-the-loop-assessment-metrics-2407.01939"/></url>
<url><loc>https://scifaro.com/en/abs/towards-unsupervised-speaker-diarization-system-for-multilingual-telephone-calls-using-pre-trained-whisper-model-and-mixture-of-sparse-autoencoders-2407.01963</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-unsupervised-speaker-diarization-system-for-multilingual-telephone-calls-using-pre-trained-whisper-model-and-mixture-of-sparse-autoencoders-2407.01963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-unsupervised-speaker-diarization-system-for-multilingual-telephone-calls-using-pre-trained-whisper-model-and-mixture-of-sparse-autoencoders-2407.01963"/></url>
<url><loc>https://scifaro.com/en/abs/sot-triggered-neural-clustering-for-speaker-attributed-asr-2407.02007</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sot-triggered-neural-clustering-for-speaker-attributed-asr-2407.02007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sot-triggered-neural-clustering-for-speaker-attributed-asr-2407.02007"/></url>
<url><loc>https://scifaro.com/en/abs/accompanied-singing-voice-synthesis-with-fully-text-controlled-melody-2407.02049</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accompanied-singing-voice-synthesis-with-fully-text-controlled-melody-2407.02049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accompanied-singing-voice-synthesis-with-fully-text-controlled-melody-2407.02049"/></url>
<url><loc>https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-icmc-asr-challenge-2407.02052</loc><lastmod>2024-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-icmc-asr-challenge-2407.02052"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-icmc-asr-challenge-2407.02052"/></url>
<url><loc>https://scifaro.com/en/abs/zero-bit-transmission-of-adaptive-pre-and-de-emphasis-filters-for-speech-and-audio-coding-2407.02672</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-bit-transmission-of-adaptive-pre-and-de-emphasis-filters-for-speech-and-audio-coding-2407.02672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-bit-transmission-of-adaptive-pre-and-de-emphasis-filters-for-speech-and-audio-coding-2407.02672"/></url>
<url><loc>https://scifaro.com/en/abs/vae-based-phoneme-alignment-using-gradient-annealing-and-ssl-acoustic-features-2407.02749</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vae-based-phoneme-alignment-using-gradient-annealing-and-ssl-acoustic-features-2407.02749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vae-based-phoneme-alignment-using-gradient-annealing-and-ssl-acoustic-features-2407.02749"/></url>
<url><loc>https://scifaro.com/en/abs/sa-wavlm-speaker-aware-self-supervised-pre-training-for-mixture-speech-2407.02826</loc><lastmod>2024-07-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sa-wavlm-speaker-aware-self-supervised-pre-training-for-mixture-speech-2407.02826"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sa-wavlm-speaker-aware-self-supervised-pre-training-for-mixture-speech-2407.02826"/></url>
<url><loc>https://scifaro.com/en/abs/codec-asr-training-performant-automatic-speech-recognition-systems-with-discrete-speech-representations-2407.03495</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codec-asr-training-performant-automatic-speech-recognition-systems-with-discrete-speech-representations-2407.03495"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codec-asr-training-performant-automatic-speech-recognition-systems-with-discrete-speech-representations-2407.03495"/></url>
<url><loc>https://scifaro.com/en/abs/learning-video-temporal-dynamics-with-cross-modal-attention-for-robust-audio-visual-speech-recognition-2407.03563</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-video-temporal-dynamics-with-cross-modal-attention-for-robust-audio-visual-speech-recognition-2407.03563"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-video-temporal-dynamics-with-cross-modal-attention-for-robust-audio-visual-speech-recognition-2407.03563"/></url>
<url><loc>https://scifaro.com/en/abs/high-fidelity-text-guided-music-editing-via-single-stage-flow-matching-2407.03648</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-fidelity-text-guided-music-editing-via-single-stage-flow-matching-2407.03648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-fidelity-text-guided-music-editing-via-single-stage-flow-matching-2407.03648"/></url>
<url><loc>https://scifaro.com/en/abs/dg-sed-domain-generalization-for-sound-event-detection-with-heterogeneous-training-data-2407.03654</loc><lastmod>2025-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dg-sed-domain-generalization-for-sound-event-detection-with-heterogeneous-training-data-2407.03654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dg-sed-domain-generalization-for-sound-event-detection-with-heterogeneous-training-data-2407.03654"/></url>
<url><loc>https://scifaro.com/en/abs/wilddesed-an-llm-powered-dataset-for-wild-domestic-environment-sound-event-detection-system-2407.03656</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wilddesed-an-llm-powered-dataset-for-wild-domestic-environment-sound-event-detection-system-2407.03656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wilddesed-an-llm-powered-dataset-for-wild-domestic-environment-sound-event-detection-system-2407.03656"/></url>
<url><loc>https://scifaro.com/en/abs/ucil-an-unsupervised-class-incremental-learning-approach-for-sound-event-detection-2407.03657</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ucil-an-unsupervised-class-incremental-learning-approach-for-sound-event-detection-2407.03657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ucil-an-unsupervised-class-incremental-learning-approach-for-sound-event-detection-2407.03657"/></url>
<url><loc>https://scifaro.com/en/abs/where-s-that-voice-coming-continual-learning-for-sound-source-localization-2407.03661</loc><lastmod>2025-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/where-s-that-voice-coming-continual-learning-for-sound-source-localization-2407.03661"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/where-s-that-voice-coming-continual-learning-for-sound-source-localization-2407.03661"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-a-dcf-for-spoofing-robust-speaker-verification-2407.04034</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-a-dcf-for-spoofing-robust-speaker-verification-2407.04034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-a-dcf-for-spoofing-robust-speaker-verification-2407.04034"/></url>
<url><loc>https://scifaro.com/en/abs/dass-distilled-audio-state-space-models-are-stronger-and-more-duration-scalable-learners-2407.04082</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dass-distilled-audio-state-space-models-are-stronger-and-more-duration-scalable-learners-2407.04082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dass-distilled-audio-state-space-models-are-stronger-and-more-duration-scalable-learners-2407.04082"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-learning-for-code-switching-asr-with-large-language-model-filter-2407.04219</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-learning-for-code-switching-asr-with-large-language-model-filter-2407.04219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-learning-for-code-switching-asr-with-large-language-model-filter-2407.04219"/></url>
<url><loc>https://scifaro.com/en/abs/who-finds-this-voice-attractive-a-large-scale-experiment-using-in-the-wild-data-2407.04270</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-finds-this-voice-attractive-a-large-scale-experiment-using-in-the-wild-data-2407.04270"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-finds-this-voice-attractive-a-large-scale-experiment-using-in-the-wild-data-2407.04270"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-speaker-embeddings-for-speech-generation-sub-center-modeling-for-capturing-intra-speaker-diversity-2407.04291</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-speaker-embeddings-for-speech-generation-sub-center-modeling-for-capturing-intra-speaker-diversity-2407.04291"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-speaker-embeddings-for-speech-generation-sub-center-modeling-for-capturing-intra-speaker-diversity-2407.04291"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-estimation-using-deep-kernel-learning-regularized-by-the-wave-equation-2407.04417</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-estimation-using-deep-kernel-learning-regularized-by-the-wave-equation-2407.04417"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-estimation-using-deep-kernel-learning-regularized-by-the-wave-equation-2407.04417"/></url>
<url><loc>https://scifaro.com/en/abs/xlsr-transducer-streaming-asr-for-self-supervised-pretrained-models-2407.04439</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xlsr-transducer-streaming-asr-for-self-supervised-pretrained-models-2407.04439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xlsr-transducer-streaming-asr-for-self-supervised-pretrained-models-2407.04439"/></url>
<url><loc>https://scifaro.com/en/abs/from-audio-encoders-to-piano-judges-benchmarking-performance-understanding-for-solo-piano-2407.04518</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-audio-encoders-to-piano-judges-benchmarking-performance-understanding-for-solo-piano-2407.04518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-audio-encoders-to-piano-judges-benchmarking-performance-understanding-for-solo-piano-2407.04518"/></url>
<url><loc>https://scifaro.com/en/abs/fa-gan-artifacts-free-and-phase-aware-high-fidelity-gan-based-vocoder-2407.04575</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fa-gan-artifacts-free-and-phase-aware-high-fidelity-gan-based-vocoder-2407.04575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fa-gan-artifacts-free-and-phase-aware-high-fidelity-gan-based-vocoder-2407.04575"/></url>
<url><loc>https://scifaro.com/en/abs/written-term-detection-improves-spoken-term-detection-2407.04601</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/written-term-detection-improves-spoken-term-detection-2407.04601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/written-term-detection-improves-spoken-term-detection-2407.04601"/></url>
<url><loc>https://scifaro.com/en/abs/speculative-speech-recognition-by-audio-prefixed-low-rank-adaptation-of-language-models-2407.04641</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speculative-speech-recognition-by-audio-prefixed-low-rank-adaptation-of-language-models-2407.04641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speculative-speech-recognition-by-audio-prefixed-low-rank-adaptation-of-language-models-2407.04641"/></url>
<url><loc>https://scifaro.com/en/abs/pretraining-end-to-end-keyword-search-with-automatically-discovered-acoustic-units-2407.04652</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pretraining-end-to-end-keyword-search-with-automatically-discovered-acoustic-units-2407.04652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pretraining-end-to-end-keyword-search-with-automatically-discovered-acoustic-units-2407.04652"/></url>
<url><loc>https://scifaro.com/en/abs/multitaper-mel-spectrograms-for-keyword-spotting-2407.04662</loc><lastmod>2024-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multitaper-mel-spectrograms-for-keyword-spotting-2407.04662"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multitaper-mel-spectrograms-for-keyword-spotting-2407.04662"/></url>
<url><loc>https://scifaro.com/en/abs/seed-asr-understanding-diverse-speech-and-contexts-with-llm-based-speech-recognition-2407.04675</loc><lastmod>2024-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seed-asr-understanding-diverse-speech-and-contexts-with-llm-based-speech-recognition-2407.04675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seed-asr-understanding-diverse-speech-and-contexts-with-llm-based-speech-recognition-2407.04675"/></url>
<url><loc>https://scifaro.com/en/abs/yourmt3-multi-instrument-music-transcription-with-enhanced-transformer-architectures-and-cross-dataset-stem-augmentation-2407.04822</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yourmt3-multi-instrument-music-transcription-with-enhanced-transformer-architectures-and-cross-dataset-stem-augmentation-2407.04822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yourmt3-multi-instrument-music-transcription-with-enhanced-transformer-architectures-and-cross-dataset-stem-augmentation-2407.04822"/></url>
<url><loc>https://scifaro.com/en/abs/emilia-an-extensive-multilingual-and-diverse-speech-dataset-for-large-scale-speech-generation-2407.05361</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emilia-an-extensive-multilingual-and-diverse-speech-dataset-for-large-scale-speech-generation-2407.05361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emilia-an-extensive-multilingual-and-diverse-speech-dataset-for-large-scale-speech-generation-2407.05361"/></url>
<url><loc>https://scifaro.com/en/abs/asrrl-tts-agile-speaker-representation-reinforcement-learning-for-text-to-speech-speaker-adaptation-2407.05421</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asrrl-tts-agile-speaker-representation-reinforcement-learning-for-text-to-speech-speaker-adaptation-2407.05421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asrrl-tts-agile-speaker-representation-reinforcement-learning-for-text-to-speech-speaker-adaptation-2407.05421"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-and-interpretable-neural-speech-editing-2407.05471</loc><lastmod>2024-07-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-and-interpretable-neural-speech-editing-2407.05471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-and-interpretable-neural-speech-editing-2407.05471"/></url>
<url><loc>https://scifaro.com/en/abs/differentiable-modal-synthesis-for-physical-modeling-of-planar-string-sound-and-motion-simulation-2407.05516</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/differentiable-modal-synthesis-for-physical-modeling-of-planar-string-sound-and-motion-simulation-2407.05516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/differentiable-modal-synthesis-for-physical-modeling-of-planar-string-sound-and-motion-simulation-2407.05516"/></url>
<url><loc>https://scifaro.com/en/abs/automating-urban-soundscape-enhancements-with-ai-in-situ-assessment-of-quality-and-restorativeness-in-traffic-exposed-residential-areas-2407.05744</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automating-urban-soundscape-enhancements-with-ai-in-situ-assessment-of-quality-and-restorativeness-in-traffic-exposed-residential-areas-2407.05744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automating-urban-soundscape-enhancements-with-ai-in-situ-assessment-of-quality-and-restorativeness-in-traffic-exposed-residential-areas-2407.05744"/></url>
<url><loc>https://scifaro.com/en/abs/xane-background-acoustic-embeddings-ablation-and-clustering-analysis-2407.06342</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xane-background-acoustic-embeddings-ablation-and-clustering-analysis-2407.06342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xane-background-acoustic-embeddings-ablation-and-clustering-analysis-2407.06342"/></url>
<url><loc>https://scifaro.com/en/abs/learn-and-don-t-forget-adding-a-new-language-to-asr-foundation-models-2407.06800</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn-and-don-t-forget-adding-a-new-language-to-asr-foundation-models-2407.06800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn-and-don-t-forget-adding-a-new-language-to-asr-foundation-models-2407.06800"/></url>
<url><loc>https://scifaro.com/en/abs/gaunt-coefficients-for-complex-and-real-spherical-harmonics-with-applications-to-spherical-array-processing-and-ambisonics-2407.06847</loc><lastmod>2024-07-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gaunt-coefficients-for-complex-and-real-spherical-harmonics-with-applications-to-spherical-array-processing-and-ambisonics-2407.06847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gaunt-coefficients-for-complex-and-real-spherical-harmonics-with-applications-to-spherical-array-processing-and-ambisonics-2407.06847"/></url>
<url><loc>https://scifaro.com/en/abs/listen-and-speak-fairly-a-study-on-semantic-gender-bias-in-speech-integrated-large-language-models-2407.06957</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-and-speak-fairly-a-study-on-semantic-gender-bias-in-speech-integrated-large-language-models-2407.06957"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-and-speak-fairly-a-study-on-semantic-gender-bias-in-speech-integrated-large-language-models-2407.06957"/></url>
<url><loc>https://scifaro.com/en/abs/remastering-divide-and-remaster-a-cinematic-audio-source-separation-dataset-with-multilingual-support-2407.07275</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/remastering-divide-and-remaster-a-cinematic-audio-source-separation-dataset-with-multilingual-support-2407.07275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/remastering-divide-and-remaster-a-cinematic-audio-source-separation-dataset-with-multilingual-support-2407.07275"/></url>
<url><loc>https://scifaro.com/en/abs/avcap-leveraging-audio-visual-features-as-text-tokens-for-captioning-2407.07801</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avcap-leveraging-audio-visual-features-as-text-tokens-for-captioning-2407.07801"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avcap-leveraging-audio-visual-features-as-text-tokens-for-captioning-2407.07801"/></url>
<url><loc>https://scifaro.com/en/abs/source-tracing-of-audio-deepfake-systems-2407.08016</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/source-tracing-of-audio-deepfake-systems-2407.08016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/source-tracing-of-audio-deepfake-systems-2407.08016"/></url>
<url><loc>https://scifaro.com/en/abs/phonetic-richness-for-improved-automatic-speaker-verification-2407.08017</loc><lastmod>2024-07-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonetic-richness-for-improved-automatic-speaker-verification-2407.08017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonetic-richness-for-improved-automatic-speaker-verification-2407.08017"/></url>
<url><loc>https://scifaro.com/en/abs/from-modular-to-end-to-end-speaker-diarization-2407.08752</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-modular-to-end-to-end-speaker-diarization-2407.08752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-modular-to-end-to-end-speaker-diarization-2407.08752"/></url>
<url><loc>https://scifaro.com/en/abs/diff-mst-differentiable-mixing-style-transfer-2407.08889</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-mst-differentiable-mixing-style-transfer-2407.08889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-mst-differentiable-mixing-style-transfer-2407.08889"/></url>
<url><loc>https://scifaro.com/en/abs/optimization-of-dnn-based-speaker-verification-model-through-efficient-quantization-technique-2407.08991</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimization-of-dnn-based-speaker-verification-model-through-efficient-quantization-technique-2407.08991"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimization-of-dnn-based-speaker-verification-model-through-efficient-quantization-technique-2407.08991"/></url>
<url><loc>https://scifaro.com/en/abs/squeeze-and-excite-resnet-conformers-for-sound-event-localization-detection-and-distance-estimation-for-dcase-2024-challenge-2407.09021</loc><lastmod>2024-07-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/squeeze-and-excite-resnet-conformers-for-sound-event-localization-detection-and-distance-estimation-for-dcase-2024-challenge-2407.09021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/squeeze-and-excite-resnet-conformers-for-sound-event-localization-detection-and-distance-estimation-for-dcase-2024-challenge-2407.09021"/></url>
<url><loc>https://scifaro.com/en/abs/speech-slytherin-examining-the-performance-and-efficiency-of-mamba-for-speech-separation-recognition-and-synthesis-2407.09732</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-slytherin-examining-the-performance-and-efficiency-of-mamba-for-speech-separation-recognition-and-synthesis-2407.09732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-slytherin-examining-the-performance-and-efficiency-of-mamba-for-speech-separation-recognition-and-synthesis-2407.09732"/></url>
<url><loc>https://scifaro.com/en/abs/cuside-array-a-streaming-multi-channel-end-to-end-speech-recognition-system-with-realistic-evaluations-2407.09807</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuside-array-a-streaming-multi-channel-end-to-end-speech-recognition-system-with-realistic-evaluations-2407.09807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuside-array-a-streaming-multi-channel-end-to-end-speech-recognition-system-with-realistic-evaluations-2407.09807"/></url>
<url><loc>https://scifaro.com/en/abs/speech-copilot-leveraging-large-language-models-for-speech-processing-via-task-decomposition-modularization-and-program-generation-2407.09886</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-copilot-leveraging-large-language-models-for-speech-processing-via-task-decomposition-modularization-and-program-generation-2407.09886"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-copilot-leveraging-large-language-models-for-speech-processing-via-task-decomposition-modularization-and-program-generation-2407.09886"/></url>
<url><loc>https://scifaro.com/en/abs/the-feasibility-of-sound-zone-control-using-an-array-of-parametric-array-loudspeakers-2407.10054</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-feasibility-of-sound-zone-control-using-an-array-of-parametric-array-loudspeakers-2407.10054"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-feasibility-of-sound-zone-control-using-an-array-of-parametric-array-loudspeakers-2407.10054"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-continual-learning-for-robust-deepfake-audio-classification-2407.10108</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-continual-learning-for-robust-deepfake-audio-classification-2407.10108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-continual-learning-for-robust-deepfake-audio-classification-2407.10108"/></url>
<url><loc>https://scifaro.com/en/abs/improving-neural-biasing-for-contextual-speech-recognition-by-early-context-injection-and-text-perturbation-2407.10303</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-neural-biasing-for-contextual-speech-recognition-by-early-context-injection-and-text-perturbation-2407.10303"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-neural-biasing-for-contextual-speech-recognition-by-early-context-injection-and-text-perturbation-2407.10303"/></url>
<url><loc>https://scifaro.com/en/abs/leave-no-knowledge-behind-during-knowledge-distillation-towards-practical-and-effective-knowledge-distillation-for-code-switching-asr-using-realistic-data-2407.10603</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leave-no-knowledge-behind-during-knowledge-distillation-towards-practical-and-effective-knowledge-distillation-for-code-switching-asr-using-realistic-data-2407.10603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leave-no-knowledge-behind-during-knowledge-distillation-towards-practical-and-effective-knowledge-distillation-for-code-switching-asr-using-realistic-data-2407.10603"/></url>
<url><loc>https://scifaro.com/en/abs/qwen2-audio-technical-report-2407.10759</loc><lastmod>2024-07-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qwen2-audio-technical-report-2407.10759"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qwen2-audio-technical-report-2407.10759"/></url>
<url><loc>https://scifaro.com/en/abs/team-hyu-asml-robovox-sp-cup-2024-system-description-2407.11365</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/team-hyu-asml-robovox-sp-cup-2024-system-description-2407.11365"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/team-hyu-asml-robovox-sp-cup-2024-system-description-2407.11365"/></url>
<url><loc>https://scifaro.com/en/abs/voxblink2-a-100k-speaker-recognition-corpus-and-the-open-set-speaker-identification-benchmark-2407.11510</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxblink2-a-100k-speaker-recognition-corpus-and-the-open-set-speaker-identification-benchmark-2407.11510"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxblink2-a-100k-speaker-recognition-corpus-and-the-open-set-speaker-identification-benchmark-2407.11510"/></url>
<url><loc>https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-progress-and-perspectives-in-voice-anonymisation-2407.11516</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-progress-and-perspectives-in-voice-anonymisation-2407.11516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-voiceprivacy-2022-challenge-progress-and-perspectives-in-voice-anonymisation-2407.11516"/></url>
<url><loc>https://scifaro.com/en/abs/musa-multi-lingual-speaker-anonymization-via-serial-disentanglement-2407.11629</loc><lastmod>2024-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musa-multi-lingual-speaker-anonymization-via-serial-disentanglement-2407.11629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musa-multi-lingual-speaker-anonymization-via-serial-disentanglement-2407.11629"/></url>
<url><loc>https://scifaro.com/en/abs/universal-sound-separation-with-self-supervised-audio-masked-autoencoder-2407.11745</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-sound-separation-with-self-supervised-audio-masked-autoencoder-2407.11745"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-sound-separation-with-self-supervised-audio-masked-autoencoder-2407.11745"/></url>
<url><loc>https://scifaro.com/en/abs/vibravox-a-dataset-of-french-speech-captured-with-body-conduction-audio-sensors-2407.11828</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vibravox-a-dataset-of-french-speech-captured-with-body-conduction-audio-sensors-2407.11828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vibravox-a-dataset-of-french-speech-captured-with-body-conduction-audio-sensors-2407.11828"/></url>
<url><loc>https://scifaro.com/en/abs/icagc-2024-inspirational-and-convincing-audio-generation-challenge-2024-2407.12038</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/icagc-2024-inspirational-and-convincing-audio-generation-challenge-2024-2407.12038"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/icagc-2024-inspirational-and-convincing-audio-generation-challenge-2024-2407.12038"/></url>
<url><loc>https://scifaro.com/en/abs/semantic-communication-for-the-internet-of-sounds-architecture-design-principles-and-challenges-2407.12203</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semantic-communication-for-the-internet-of-sounds-architecture-design-principles-and-challenges-2407.12203"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semantic-communication-for-the-internet-of-sounds-architecture-design-principles-and-challenges-2407.12203"/></url>
<url><loc>https://scifaro.com/en/abs/laugh-now-cry-later-controlling-time-varying-emotional-states-of-flow-matching-based-zero-shot-text-to-speech-2407.12229</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/laugh-now-cry-later-controlling-time-varying-emotional-states-of-flow-matching-based-zero-shot-text-to-speech-2407.12229"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/laugh-now-cry-later-controlling-time-varying-emotional-states-of-flow-matching-based-zero-shot-text-to-speech-2407.12229"/></url>
<url><loc>https://scifaro.com/en/abs/pcq-emotion-recognition-in-speech-via-progressive-channel-querying-2407.12380</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pcq-emotion-recognition-in-speech-via-progressive-channel-querying-2407.12380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pcq-emotion-recognition-in-speech-via-progressive-channel-querying-2407.12380"/></url>
<url><loc>https://scifaro.com/en/abs/bsc-upc-at-emospeech-iberlef2024-attention-pooling-for-emotion-recognition-2407.12467</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bsc-upc-at-emospeech-iberlef2024-attention-pooling-for-emotion-recognition-2407.12467"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bsc-upc-at-emospeech-iberlef2024-attention-pooling-for-emotion-recognition-2407.12467"/></url>
<url><loc>https://scifaro.com/en/abs/ttsds-text-to-speech-distribution-score-2407.12707</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ttsds-text-to-speech-distribution-score-2407.12707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ttsds-text-to-speech-distribution-score-2407.12707"/></url>
<url><loc>https://scifaro.com/en/abs/taltech-irit-lis-speaker-and-language-diarization-systems-for-displace-2024-2407.12743</loc><lastmod>2024-07-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taltech-irit-lis-speaker-and-language-diarization-systems-for-displace-2024-2407.12743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taltech-irit-lis-speaker-and-language-diarization-systems-for-displace-2024-2407.12743"/></url>
<url><loc>https://scifaro.com/en/abs/multi-iteration-multi-stage-fine-tuning-of-transformers-for-sound-event-detection-with-heterogeneous-datasets-2407.12997</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-iteration-multi-stage-fine-tuning-of-transformers-for-sound-event-detection-with-heterogeneous-datasets-2407.12997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-iteration-multi-stage-fine-tuning-of-transformers-for-sound-event-detection-with-heterogeneous-datasets-2407.12997"/></url>
<url><loc>https://scifaro.com/en/abs/medic-zero-shot-music-editing-with-disentangled-inversion-control-2407.13220</loc><lastmod>2025-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/medic-zero-shot-music-editing-with-disentangled-inversion-control-2407.13220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/medic-zero-shot-music-editing-with-disentangled-inversion-control-2407.13220"/></url>
<url><loc>https://scifaro.com/en/abs/fade-in-reverberation-in-multi-room-environments-using-the-common-slope-model-2407.13242</loc><lastmod>2024-07-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fade-in-reverberation-in-multi-room-environments-using-the-common-slope-model-2407.13242"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fade-in-reverberation-in-multi-room-environments-using-the-common-slope-model-2407.13242"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-asr-models-and-features-for-dysarthric-and-elderly-speech-recognition-2407.13782</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-asr-models-and-features-for-dysarthric-and-elderly-speech-recognition-2407.13782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-asr-models-and-features-for-dysarthric-and-elderly-speech-recognition-2407.13782"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-contrastive-learning-of-musical-representations-2407.13840</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-contrastive-learning-of-musical-representations-2407.13840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-contrastive-learning-of-musical-representations-2407.13840"/></url>
<url><loc>https://scifaro.com/en/abs/improving-the-robustness-and-clinical-applicability-of-automatic-respiratory-sound-classification-using-deep-learning-based-audio-enhancement-algorithm-development-and-validation-2407.13895</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-the-robustness-and-clinical-applicability-of-automatic-respiratory-sound-classification-using-deep-learning-based-audio-enhancement-algorithm-development-and-validation-2407.13895"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-the-robustness-and-clinical-applicability-of-automatic-respiratory-sound-classification-using-deep-learning-based-audio-enhancement-algorithm-development-and-validation-2407.13895"/></url>
<url><loc>https://scifaro.com/en/abs/mscenespeech-a-multi-scene-speech-dataset-for-expressive-speech-synthesis-2407.14006</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mscenespeech-a-multi-scene-speech-dataset-for-expressive-speech-synthesis-2407.14006"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mscenespeech-a-multi-scene-speech-dataset-for-expressive-speech-synthesis-2407.14006"/></url>
<url><loc>https://scifaro.com/en/abs/ge2e-ac-generalized-end-to-end-loss-training-for-accent-classification-2407.14021</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ge2e-ac-generalized-end-to-end-loss-training-for-accent-classification-2407.14021"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ge2e-ac-generalized-end-to-end-loss-training-for-accent-classification-2407.14021"/></url>
<url><loc>https://scifaro.com/en/abs/wideband-relative-transfer-function-rtf-estimation-exploiting-frequency-correlations-2407.14152</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wideband-relative-transfer-function-rtf-estimation-exploiting-frequency-correlations-2407.14152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wideband-relative-transfer-function-rtf-estimation-exploiting-frequency-correlations-2407.14152"/></url>
<url><loc>https://scifaro.com/en/abs/topology-independent-gevd-based-distributed-adaptive-node-specific-signal-estimation-in-ad-hoc-wireless-acoustic-sensor-networks-2407.14172</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/topology-independent-gevd-based-distributed-adaptive-node-specific-signal-estimation-in-ad-hoc-wireless-acoustic-sensor-networks-2407.14172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/topology-independent-gevd-based-distributed-adaptive-node-specific-signal-estimation-in-ad-hoc-wireless-acoustic-sensor-networks-2407.14172"/></url>
<url><loc>https://scifaro.com/en/abs/polysinger-singing-voice-to-singing-voice-translation-from-english-to-japanese-2407.14399</loc><lastmod>2024-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/polysinger-singing-voice-to-singing-voice-translation-from-english-to-japanese-2407.14399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/polysinger-singing-voice-to-singing-voice-translation-from-english-to-japanese-2407.14399"/></url>
<url><loc>https://scifaro.com/en/abs/multi-label-audio-classification-with-a-noisy-zero-shot-teacher-2407.14712</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-label-audio-classification-with-a-noisy-zero-shot-teacher-2407.14712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-label-audio-classification-with-a-noisy-zero-shot-teacher-2407.14712"/></url>
<url><loc>https://scifaro.com/en/abs/towards-realistic-emotional-voice-conversion-using-controllable-emotional-intensity-2407.14800</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-realistic-emotional-voice-conversion-using-controllable-emotional-intensity-2407.14800"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-realistic-emotional-voice-conversion-using-controllable-emotional-intensity-2407.14800"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-speaker-modeling-and-its-applications-from-the-lens-of-deep-speaker-representation-learning-2407.15188</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-speaker-modeling-and-its-applications-from-the-lens-of-deep-speaker-representation-learning-2407.15188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-speaker-modeling-and-its-applications-from-the-lens-of-deep-speaker-representation-learning-2407.15188"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-ip-broadcasting-with-audio-tags-workflow-and-challenges-2407.15423</loc><lastmod>2025-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-ip-broadcasting-with-audio-tags-workflow-and-challenges-2407.15423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-ip-broadcasting-with-audio-tags-workflow-and-challenges-2407.15423"/></url>
<url><loc>https://scifaro.com/en/abs/emo-codec-an-in-depth-look-at-emotion-preservation-capacity-of-legacy-and-neural-codec-models-with-subjective-and-objective-evaluations-2407.15458</loc><lastmod>2024-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-codec-an-in-depth-look-at-emotion-preservation-capacity-of-legacy-and-neural-codec-models-with-subjective-and-objective-evaluations-2407.15458"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-codec-an-in-depth-look-at-emotion-preservation-capacity-of-legacy-and-neural-codec-models-with-subjective-and-objective-evaluations-2407.15458"/></url>
<url><loc>https://scifaro.com/en/abs/dsp-informed-bandwidth-extension-using-locally-conditioned-excitation-and-linear-time-varying-filter-subnetworks-2407.15624</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dsp-informed-bandwidth-extension-using-locally-conditioned-excitation-and-linear-time-varying-filter-subnetworks-2407.15624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dsp-informed-bandwidth-extension-using-locally-conditioned-excitation-and-linear-time-varying-filter-subnetworks-2407.15624"/></url>
<url><loc>https://scifaro.com/en/abs/generating-sample-based-musical-instruments-using-neural-audio-codec-language-models-2407.15641</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-sample-based-musical-instruments-using-neural-audio-codec-language-models-2407.15641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-sample-based-musical-instruments-using-neural-audio-codec-language-models-2407.15641"/></url>
<url><loc>https://scifaro.com/en/abs/robustness-of-speech-separation-models-for-similar-pitch-speakers-2407.15749</loc><lastmod>2024-07-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robustness-of-speech-separation-models-for-similar-pitch-speakers-2407.15749"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robustness-of-speech-separation-models-for-similar-pitch-speakers-2407.15749"/></url>
<url><loc>https://scifaro.com/en/abs/schr-odinger-bridge-for-generative-speech-enhancement-2407.16074</loc><lastmod>2024-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/schr-odinger-bridge-for-generative-speech-enhancement-2407.16074"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/schr-odinger-bridge-for-generative-speech-enhancement-2407.16074"/></url>
<url><loc>https://scifaro.com/en/abs/the-chime-8-dasr-challenge-for-generalizable-and-array-agnostic-distant-automatic-speech-recognition-and-diarization-2407.16447</loc><lastmod>2024-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-chime-8-dasr-challenge-for-generalizable-and-array-agnostic-distant-automatic-speech-recognition-and-diarization-2407.16447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-chime-8-dasr-challenge-for-generalizable-and-array-agnostic-distant-automatic-speech-recognition-and-diarization-2407.16447"/></url>
<url><loc>https://scifaro.com/en/abs/synthesizer-sound-matching-using-audio-spectrogram-transformers-2407.16643</loc><lastmod>2024-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthesizer-sound-matching-using-audio-spectrogram-transformers-2407.16643"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthesizer-sound-matching-using-audio-spectrogram-transformers-2407.16643"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-equalization-for-individual-instrument-tracks-using-convolutional-neural-networks-2407.16691</loc><lastmod>2024-07-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-equalization-for-individual-instrument-tracks-using-convolutional-neural-networks-2407.16691"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-equalization-for-individual-instrument-tracks-using-convolutional-neural-networks-2407.16691"/></url>
<url><loc>https://scifaro.com/en/abs/synth4kws-synthesized-speech-for-user-defined-keyword-spotting-in-low-resource-environments-2407.16840</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synth4kws-synthesized-speech-for-user-defined-keyword-spotting-in-low-resource-environments-2407.16840"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synth4kws-synthesized-speech-for-user-defined-keyword-spotting-in-low-resource-environments-2407.16840"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-based-ensemble-learning-for-speech-classification-2407.17009</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-based-ensemble-learning-for-speech-classification-2407.17009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-based-ensemble-learning-for-speech-classification-2407.17009"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-detection-and-annotation-of-sperm-whale-codas-2407.17119</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-detection-and-annotation-of-sperm-whale-codas-2407.17119"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-detection-and-annotation-of-sperm-whale-codas-2407.17119"/></url>
<url><loc>https://scifaro.com/en/abs/reduction-of-nonlinear-distortion-in-condenser-microphones-using-a-simple-post-processing-technique-2407.17250</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduction-of-nonlinear-distortion-in-condenser-microphones-using-a-simple-post-processing-technique-2407.17250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduction-of-nonlinear-distortion-in-condenser-microphones-using-a-simple-post-processing-technique-2407.17250"/></url>
<url><loc>https://scifaro.com/en/abs/explaining-spectrograms-in-machine-learning-a-study-on-neural-networks-for-speech-classification-2407.17416</loc><lastmod>2024-07-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explaining-spectrograms-in-machine-learning-a-study-on-neural-networks-for-speech-classification-2407.17416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explaining-spectrograms-in-machine-learning-a-study-on-neural-networks-for-speech-classification-2407.17416"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-review-and-taxonomy-of-audio-visual-synchronization-techniques-for-realistic-speech-animation-2407.17430</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-review-and-taxonomy-of-audio-visual-synchronization-techniques-for-realistic-speech-animation-2407.17430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-review-and-taxonomy-of-audio-visual-synchronization-techniques-for-realistic-speech-animation-2407.17430"/></url>
<url><loc>https://scifaro.com/en/abs/multi-stage-face-voice-association-learning-with-keynote-speaker-diarization-2407.17902</loc><lastmod>2024-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-stage-face-voice-association-learning-with-keynote-speaker-diarization-2407.17902"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-stage-face-voice-association-learning-with-keynote-speaker-diarization-2407.17902"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-manatee-vocalisations-using-the-audio-spectrogram-transformer-2407.18083</loc><lastmod>2024-07-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-manatee-vocalisations-using-the-audio-spectrogram-transformer-2407.18083"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-manatee-vocalisations-using-the-audio-spectrogram-transformer-2407.18083"/></url>
<url><loc>https://scifaro.com/en/abs/reshape-dimensions-network-for-speaker-recognition-2407.18223</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reshape-dimensions-network-for-speaker-recognition-2407.18223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reshape-dimensions-network-for-speaker-recognition-2407.18223"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-speech-unit-selection-for-textless-speech-to-speech-translation-2407.18332</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-speech-unit-selection-for-textless-speech-to-speech-translation-2407.18332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-speech-unit-selection-for-textless-speech-to-speech-translation-2407.18332"/></url>
<url><loc>https://scifaro.com/en/abs/matlab-based-epoch-extraction-for-speaker-differentiation-2407.18447</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/matlab-based-epoch-extraction-for-speaker-differentiation-2407.18447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/matlab-based-epoch-extraction-for-speaker-differentiation-2407.18447"/></url>
<url><loc>https://scifaro.com/en/abs/voxsim-a-perceptual-voice-similarity-dataset-2407.18505</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxsim-a-perceptual-voice-similarity-dataset-2407.18505"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxsim-a-perceptual-voice-similarity-dataset-2407.18505"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-posture-control-in-speech-motor-models-a-parallel-structured-simulation-approach-2407.18516</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-posture-control-in-speech-motor-models-a-parallel-structured-simulation-approach-2407.18516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-posture-control-in-speech-motor-models-a-parallel-structured-simulation-approach-2407.18516"/></url>
<url><loc>https://scifaro.com/en/abs/a-physics-informed-neural-network-based-approach-for-the-spatial-upsampling-of-spherical-microphone-arrays-2407.18732</loc><lastmod>2024-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-physics-informed-neural-network-based-approach-for-the-spatial-upsampling-of-spherical-microphone-arrays-2407.18732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-physics-informed-neural-network-based-approach-for-the-spatial-upsampling-of-spherical-microphone-arrays-2407.18732"/></url>
<url><loc>https://scifaro.com/en/abs/voxmed-one-step-respiratory-disease-classifier-using-digital-stethoscope-sounds-2407.18926</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voxmed-one-step-respiratory-disease-classifier-using-digital-stethoscope-sounds-2407.18926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voxmed-one-step-respiratory-disease-classifier-using-digital-stethoscope-sounds-2407.18926"/></url>
<url><loc>https://scifaro.com/en/abs/asgir-audio-spectrogram-transformer-guided-classification-and-information-retrieval-for-birds-2407.18927</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asgir-audio-spectrogram-transformer-guided-classification-and-information-retrieval-for-birds-2407.18927"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asgir-audio-spectrogram-transformer-guided-classification-and-information-retrieval-for-birds-2407.18927"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-encoder-size-based-on-data-driven-layer-wise-pruning-for-speech-recognition-2407.18930</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-encoder-size-based-on-data-driven-layer-wise-pruning-for-speech-recognition-2407.18930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-encoder-size-based-on-data-driven-layer-wise-pruning-for-speech-recognition-2407.18930"/></url>
<url><loc>https://scifaro.com/en/abs/ctpulse-close-talk-and-pseudo-label-based-far-field-speech-enhancement-2407.19485</loc><lastmod>2025-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctpulse-close-talk-and-pseudo-label-based-far-field-speech-enhancement-2407.19485"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctpulse-close-talk-and-pseudo-label-based-far-field-speech-enhancement-2407.19485"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-channel-attention-network-for-small-footprint-noisy-spoken-keyword-spotting-2407.19834</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-channel-attention-network-for-small-footprint-noisy-spoken-keyword-spotting-2407.19834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-channel-attention-network-for-small-footprint-noisy-spoken-keyword-spotting-2407.19834"/></url>
<url><loc>https://scifaro.com/en/abs/blind-acoustic-parameter-estimation-through-task-agnostic-embeddings-using-latent-approximations-2407.19989</loc><lastmod>2024-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-acoustic-parameter-estimation-through-task-agnostic-embeddings-using-latent-approximations-2407.19989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-acoustic-parameter-estimation-through-task-agnostic-embeddings-using-latent-approximations-2407.19989"/></url>
<url><loc>https://scifaro.com/en/abs/t-bar-a-lagen-a-system-for-automatic-t-bar-a-la-identification-and-generation-2407.20935</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/t-bar-a-lagen-a-system-for-automatic-t-bar-a-la-identification-and-generation-2407.20935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/t-bar-a-lagen-a-system-for-automatic-t-bar-a-la-identification-and-generation-2407.20935"/></url>
<url><loc>https://scifaro.com/en/abs/cluster-and-separate-a-gnn-approach-to-voice-and-staff-prediction-for-score-engraving-2407.21030</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cluster-and-separate-a-gnn-approach-to-voice-and-staff-prediction-for-score-engraving-2407.21030"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cluster-and-separate-a-gnn-approach-to-voice-and-staff-prediction-for-score-engraving-2407.21030"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-self-supervised-models-for-automatic-whispered-speech-recognition-2407.21211</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-self-supervised-models-for-automatic-whispered-speech-recognition-2407.21211"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-self-supervised-models-for-automatic-whispered-speech-recognition-2407.21211"/></url>
<url><loc>https://scifaro.com/en/abs/towards-emg-to-speech-with-a-necklace-form-factor-2407.21345</loc><lastmod>2024-08-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-emg-to-speech-with-a-necklace-form-factor-2407.21345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-emg-to-speech-with-a-necklace-form-factor-2407.21345"/></url>
<url><loc>https://scifaro.com/en/abs/towards-interfacing-large-language-models-with-asr-systems-using-confidence-measures-and-prompting-2407.21414</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-interfacing-large-language-models-with-asr-systems-using-confidence-measures-and-prompting-2407.21414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-interfacing-large-language-models-with-asr-systems-using-confidence-measures-and-prompting-2407.21414"/></url>
<url><loc>https://scifaro.com/en/abs/handling-numeric-expressions-in-automatic-speech-recognition-2408.00004</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/handling-numeric-expressions-in-automatic-speech-recognition-2408.00004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/handling-numeric-expressions-in-automatic-speech-recognition-2408.00004"/></url>
<url><loc>https://scifaro.com/en/abs/framework-for-curating-speech-datasets-and-evaluating-asr-systems-a-case-study-for-polish-2408.00005</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/framework-for-curating-speech-datasets-and-evaluating-asr-systems-a-case-study-for-polish-2408.00005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/framework-for-curating-speech-datasets-and-evaluating-asr-systems-a-case-study-for-polish-2408.00005"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-universal-method-for-meaningful-signal-detection-2408.00016</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-universal-method-for-meaningful-signal-detection-2408.00016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-universal-method-for-meaningful-signal-detection-2408.00016"/></url>
<url><loc>https://scifaro.com/en/abs/long-term-conversation-analysis-privacy-utility-trade-off-under-noise-and-reverberation-2408.00382</loc><lastmod>2026-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/long-term-conversation-analysis-privacy-utility-trade-off-under-noise-and-reverberation-2408.00382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/long-term-conversation-analysis-privacy-utility-trade-off-under-noise-and-reverberation-2408.00382"/></url>
<url><loc>https://scifaro.com/en/abs/syneslm-a-unified-approach-for-audio-visual-speech-recognition-and-translation-via-language-model-and-synthetic-data-2408.00624</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/syneslm-a-unified-approach-for-audio-visual-speech-recognition-and-translation-via-language-model-and-synthetic-data-2408.00624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/syneslm-a-unified-approach-for-audio-visual-speech-recognition-and-translation-via-language-model-and-synthetic-data-2408.00624"/></url>
<url><loc>https://scifaro.com/en/abs/concerns-for-self-localization-of-ad-hoc-arrays-using-time-difference-of-arrivals-2408.00732</loc><lastmod>2024-08-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concerns-for-self-localization-of-ad-hoc-arrays-using-time-difference-of-arrivals-2408.00732"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concerns-for-self-localization-of-ad-hoc-arrays-using-time-difference-of-arrivals-2408.00732"/></url>
<url><loc>https://scifaro.com/en/abs/improving-audio-spectrogram-transformers-for-sound-event-detection-through-multi-stage-training-2408.00791</loc><lastmod>2024-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-audio-spectrogram-transformers-for-sound-event-detection-through-multi-stage-training-2408.00791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-audio-spectrogram-transformers-for-sound-event-detection-through-multi-stage-training-2408.00791"/></url>
<url><loc>https://scifaro.com/en/abs/re-enact-reinforcement-learning-for-emotional-speech-generation-using-actor-critic-strategy-2408.01892</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/re-enact-reinforcement-learning-for-emotional-speech-generation-using-actor-critic-strategy-2408.01892"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/re-enact-reinforcement-learning-for-emotional-speech-generation-using-actor-critic-strategy-2408.01892"/></url>
<url><loc>https://scifaro.com/en/abs/streamvoice-evolving-into-end-to-end-streaming-zero-shot-voice-conversion-2408.02178</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streamvoice-evolving-into-end-to-end-streaming-zero-shot-voice-conversion-2408.02178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streamvoice-evolving-into-end-to-end-streaming-zero-shot-voice-conversion-2408.02178"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-orthography-automatic-recovery-of-short-vowels-and-dialectal-sounds-in-arabic-2408.02430</loc><lastmod>2024-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-orthography-automatic-recovery-of-short-vowels-and-dialectal-sounds-in-arabic-2408.02430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-orthography-automatic-recovery-of-short-vowels-and-dialectal-sounds-in-arabic-2408.02430"/></url>
<url><loc>https://scifaro.com/en/abs/enhanced-reverberation-as-supervision-for-unsupervised-speech-separation-2408.03438</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhanced-reverberation-as-supervision-for-unsupervised-speech-separation-2408.03438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhanced-reverberation-as-supervision-for-unsupervised-speech-separation-2408.03438"/></url>
<url><loc>https://scifaro.com/en/abs/tf-locoformer-transformer-with-local-modeling-by-convolution-for-speech-separation-and-enhancement-2408.03440</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tf-locoformer-transformer-with-local-modeling-by-convolution-for-speech-separation-and-enhancement-2408.03440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tf-locoformer-transformer-with-local-modeling-by-convolution-for-speech-separation-and-enhancement-2408.03440"/></url>
<url><loc>https://scifaro.com/en/abs/design-and-analysis-of-binaural-signal-matching-with-arbitrary-microphone-arrays-and-listener-head-rotations-2408.03581</loc><lastmod>2025-04-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/design-and-analysis-of-binaural-signal-matching-with-arbitrary-microphone-arrays-and-listener-head-rotations-2408.03581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/design-and-analysis-of-binaural-signal-matching-with-arbitrary-microphone-arrays-and-listener-head-rotations-2408.03581"/></url>
<url><loc>https://scifaro.com/en/abs/facing-the-music-tackling-singing-voice-separation-in-cinematic-audio-source-separation-2408.03588</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/facing-the-music-tackling-singing-voice-separation-in-cinematic-audio-source-separation-2408.03588"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/facing-the-music-tackling-singing-voice-separation-in-cinematic-audio-source-separation-2408.03588"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-gap-between-audio-and-text-using-parallel-attention-for-user-defined-keyword-spotting-2408.03593</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-gap-between-audio-and-text-using-parallel-attention-for-user-defined-keyword-spotting-2408.03593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-gap-between-audio-and-text-using-parallel-attention-for-user-defined-keyword-spotting-2408.03593"/></url>
<url><loc>https://scifaro.com/en/abs/feasibility-of-imagls-bsm-ild-informed-binaural-signal-matching-with-arbitrary-microphone-arrays-2408.03611</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feasibility-of-imagls-bsm-ild-informed-binaural-signal-matching-with-arbitrary-microphone-arrays-2408.03611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feasibility-of-imagls-bsm-ild-informed-binaural-signal-matching-with-arbitrary-microphone-arrays-2408.03611"/></url>
<url><loc>https://scifaro.com/en/abs/one-shot-distributed-node-specific-signal-estimation-with-non-overlapping-latent-subspaces-in-acoustic-sensor-networks-2408.03752</loc><lastmod>2024-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/one-shot-distributed-node-specific-signal-estimation-with-non-overlapping-latent-subspaces-in-acoustic-sensor-networks-2408.03752"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/one-shot-distributed-node-specific-signal-estimation-with-non-overlapping-latent-subspaces-in-acoustic-sensor-networks-2408.03752"/></url>
<url><loc>https://scifaro.com/en/abs/speech-privacy-preserving-methods-using-secret-key-for-convolutional-neural-network-models-and-their-robustness-evaluation-2408.03897</loc><lastmod>2024-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-privacy-preserving-methods-using-secret-key-for-convolutional-neural-network-models-and-their-robustness-evaluation-2408.03897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-privacy-preserving-methods-using-secret-key-for-convolutional-neural-network-models-and-their-robustness-evaluation-2408.03897"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-the-potential-impact-of-direction-dependent-hrtf-selection-on-sound-localization-accuracy-2408.04288</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-the-potential-impact-of-direction-dependent-hrtf-selection-on-sound-localization-accuracy-2408.04288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-the-potential-impact-of-direction-dependent-hrtf-selection-on-sound-localization-accuracy-2408.04288"/></url>
<url><loc>https://scifaro.com/en/abs/preserving-spoken-content-in-voice-anonymisation-with-character-level-vocoder-conditioning-2408.04306</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/preserving-spoken-content-in-voice-anonymisation-with-character-level-vocoder-conditioning-2408.04306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/preserving-spoken-content-in-voice-anonymisation-with-character-level-vocoder-conditioning-2408.04306"/></url>
<url><loc>https://scifaro.com/en/abs/hydraformer-one-encoder-for-all-subsampling-rates-2408.04325</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hydraformer-one-encoder-for-all-subsampling-rates-2408.04325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hydraformer-one-encoder-for-all-subsampling-rates-2408.04325"/></url>
<url><loc>https://scifaro.com/en/abs/simulating-articulatory-trajectories-with-phonological-feature-interpolation-2408.04363</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simulating-articulatory-trajectories-with-phonological-feature-interpolation-2408.04363"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simulating-articulatory-trajectories-with-phonological-feature-interpolation-2408.04363"/></url>
<url><loc>https://scifaro.com/en/abs/articulatory-configurations-across-genders-and-periods-in-french-radio-and-tv-archives-2408.04519</loc><lastmod>2024-08-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/articulatory-configurations-across-genders-and-periods-in-french-radio-and-tv-archives-2408.04519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/articulatory-configurations-across-genders-and-periods-in-french-radio-and-tv-archives-2408.04519"/></url>
<url><loc>https://scifaro.com/en/abs/add-2023-towards-audio-deepfake-detection-and-analysis-in-the-wild-2408.04967</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/add-2023-towards-audio-deepfake-detection-and-analysis-in-the-wild-2408.04967"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/add-2023-towards-audio-deepfake-detection-and-analysis-in-the-wild-2408.04967"/></url>
<url><loc>https://scifaro.com/en/abs/improving-whisper-s-recognition-performance-for-under-represented-language-kazakh-leveraging-unpaired-speech-and-text-2408.05554</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-whisper-s-recognition-performance-for-under-represented-language-kazakh-leveraging-unpaired-speech-and-text-2408.05554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-whisper-s-recognition-performance-for-under-represented-language-kazakh-leveraging-unpaired-speech-and-text-2408.05554"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-quantitative-analysis-of-coarticulation-with-a-phoneme-to-articulatory-model-2408.05641</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-quantitative-analysis-of-coarticulation-with-a-phoneme-to-articulatory-model-2408.05641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-quantitative-analysis-of-coarticulation-with-a-phoneme-to-articulatory-model-2408.05641"/></url>
<url><loc>https://scifaro.com/en/abs/extracting-urban-sound-information-for-residential-areas-in-smart-cities-using-an-end-to-end-iot-system-2408.05721</loc><lastmod>2024-08-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extracting-urban-sound-information-for-residential-areas-in-smart-cities-using-an-end-to-end-iot-system-2408.05721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extracting-urban-sound-information-for-residential-areas-in-smart-cities-using-an-end-to-end-iot-system-2408.05721"/></url>
<url><loc>https://scifaro.com/en/abs/vq-ctap-cross-modal-fine-grained-sequence-representation-learning-for-speech-processing-2408.05758</loc><lastmod>2025-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vq-ctap-cross-modal-fine-grained-sequence-representation-learning-for-speech-processing-2408.05758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vq-ctap-cross-modal-fine-grained-sequence-representation-learning-for-speech-processing-2408.05758"/></url>
<url><loc>https://scifaro.com/en/abs/lyrics-transcription-for-humans-a-readability-aware-benchmark-2408.06370</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lyrics-transcription-for-humans-a-readability-aware-benchmark-2408.06370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lyrics-transcription-for-humans-a-readability-aware-benchmark-2408.06370"/></url>
<url><loc>https://scifaro.com/en/abs/present-zero-shot-text-to-prosody-control-2408.06827</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/present-zero-shot-text-to-prosody-control-2408.06827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/present-zero-shot-text-to-prosody-control-2408.06827"/></url>
<url><loc>https://scifaro.com/en/abs/bss-cffma-cross-domain-feature-fusion-and-multi-attention-speech-enhancement-network-based-on-self-supervised-embedding-2408.06851</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bss-cffma-cross-domain-feature-fusion-and-multi-attention-speech-enhancement-network-based-on-self-supervised-embedding-2408.06851"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bss-cffma-cross-domain-feature-fusion-and-multi-attention-speech-enhancement-network-based-on-self-supervised-embedding-2408.06851"/></url>
<url><loc>https://scifaro.com/en/abs/saslaw-dialogue-speech-corpus-with-audio-visual-egocentric-information-toward-environment-adaptive-dialogue-speech-synthesis-2408.06858</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/saslaw-dialogue-speech-corpus-with-audio-visual-egocentric-information-toward-environment-adaptive-dialogue-speech-synthesis-2408.06858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/saslaw-dialogue-speech-corpus-with-audio-visual-egocentric-information-toward-environment-adaptive-dialogue-speech-synthesis-2408.06858"/></url>
<url><loc>https://scifaro.com/en/abs/vnet-a-gan-based-multi-tier-discriminator-network-for-speech-synthesis-vocoders-2408.06906</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vnet-a-gan-based-multi-tier-discriminator-network-for-speech-synthesis-vocoders-2408.06906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vnet-a-gan-based-multi-tier-discriminator-network-for-speech-synthesis-vocoders-2408.06906"/></url>
<url><loc>https://scifaro.com/en/abs/heterogeneous-space-fusion-and-dual-dimension-attention-a-new-paradigm-for-speech-enhancement-2408.06911</loc><lastmod>2024-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/heterogeneous-space-fusion-and-dual-dimension-attention-a-new-paradigm-for-speech-enhancement-2408.06911"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/heterogeneous-space-fusion-and-dual-dimension-attention-a-new-paradigm-for-speech-enhancement-2408.06911"/></url>
<url><loc>https://scifaro.com/en/abs/direction-of-arrival-correction-through-speech-quality-feedback-2408.07234</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/direction-of-arrival-correction-through-speech-quality-feedback-2408.07234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/direction-of-arrival-correction-through-speech-quality-feedback-2408.07234"/></url>
<url><loc>https://scifaro.com/en/abs/morphfader-enabling-fine-grained-controllable-morphing-with-text-to-audio-models-2408.07260</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/morphfader-enabling-fine-grained-controllable-morphing-with-text-to-audio-models-2408.07260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/morphfader-enabling-fine-grained-controllable-morphing-with-text-to-audio-models-2408.07260"/></url>
<url><loc>https://scifaro.com/en/abs/wavlm-model-ensemble-for-audio-deepfake-detection-2408.07414</loc><lastmod>2024-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavlm-model-ensemble-for-audio-deepfake-detection-2408.07414"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavlm-model-ensemble-for-audio-deepfake-detection-2408.07414"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-blind-joint-dereverberation-and-room-acoustics-estimation-with-diffusion-models-2408.07472</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-blind-joint-dereverberation-and-room-acoustics-estimation-with-diffusion-models-2408.07472"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-blind-joint-dereverberation-and-room-acoustics-estimation-with-diffusion-models-2408.07472"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-multi-grained-alignment-for-contrastive-language-audio-pre-training-2408.07919</loc><lastmod>2024-08-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-multi-grained-alignment-for-contrastive-language-audio-pre-training-2408.07919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-multi-grained-alignment-for-contrastive-language-audio-pre-training-2408.07919"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-large-language-model-based-speech-recognition-by-contextualization-for-rare-and-ambiguous-words-2408.08027</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-large-language-model-based-speech-recognition-by-contextualization-for-rare-and-ambiguous-words-2408.08027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-large-language-model-based-speech-recognition-by-contextualization-for-rare-and-ambiguous-words-2408.08027"/></url>
<url><loc>https://scifaro.com/en/abs/concatenet-dialogue-separation-using-local-and-global-feature-concatenation-2408.08729</loc><lastmod>2024-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/concatenet-dialogue-separation-using-local-and-global-feature-concatenation-2408.08729"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/concatenet-dialogue-separation-using-local-and-global-feature-concatenation-2408.08729"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-5-crowdsourced-speech-data-deepfakes-and-adversarial-attacks-at-scale-2408.08739</loc><lastmod>2024-08-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-5-crowdsourced-speech-data-deepfakes-and-adversarial-attacks-at-scale-2408.08739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-5-crowdsourced-speech-data-deepfakes-and-adversarial-attacks-at-scale-2408.08739"/></url>
<url><loc>https://scifaro.com/en/abs/generating-data-with-text-to-speech-and-large-language-models-for-conversational-speech-recognition-2408.09215</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-data-with-text-to-speech-and-large-language-models-for-conversational-speech-recognition-2408.09215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-data-with-text-to-speech-and-large-language-models-for-conversational-speech-recognition-2408.09215"/></url>
<url><loc>https://scifaro.com/en/abs/malacopula-adversarial-automatic-speaker-verification-attacks-using-a-neural-based-generalised-hammerstein-model-2408.09300</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/malacopula-adversarial-automatic-speaker-verification-attacks-using-a-neural-based-generalised-hammerstein-model-2408.09300"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/malacopula-adversarial-automatic-speaker-verification-attacks-using-a-neural-based-generalised-hammerstein-model-2408.09300"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-area-based-and-speaker-agnostic-source-separation-2408.09810</loc><lastmod>2024-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-area-based-and-speaker-agnostic-source-separation-2408.09810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-area-based-and-speaker-agnostic-source-separation-2408.09810"/></url>
<url><loc>https://scifaro.com/en/abs/asasvicomtech-the-vicomtech-ugr-speech-deepfake-detection-and-sasv-systems-for-the-asvspoof5-challenge-2408.10361</loc><lastmod>2024-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asasvicomtech-the-vicomtech-ugr-speech-deepfake-detection-and-sasv-systems-for-the-asvspoof5-challenge-2408.10361"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asasvicomtech-the-vicomtech-ugr-speech-deepfake-detection-and-sasv-systems-for-the-asvspoof5-challenge-2408.10361"/></url>
<url><loc>https://scifaro.com/en/abs/knn-retrieval-for-simple-and-effective-zero-shot-multi-speaker-text-to-speech-2408.10771</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/knn-retrieval-for-simple-and-effective-zero-shot-multi-speaker-text-to-speech-2408.10771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/knn-retrieval-for-simple-and-effective-zero-shot-multi-speaker-text-to-speech-2408.10771"/></url>
<url><loc>https://scifaro.com/en/abs/improving-query-by-vocal-imitation-with-contrastive-learning-and-audio-pretraining-2408.11638</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-query-by-vocal-imitation-with-contrastive-learning-and-audio-pretraining-2408.11638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-query-by-vocal-imitation-with-contrastive-learning-and-audio-pretraining-2408.11638"/></url>
<url><loc>https://scifaro.com/en/abs/estimated-audio-caption-correspondences-improve-language-based-audio-retrieval-2408.11641</loc><lastmod>2024-08-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimated-audio-caption-correspondences-improve-language-based-audio-retrieval-2408.11641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimated-audio-caption-correspondences-improve-language-based-audio-retrieval-2408.11641"/></url>
<url><loc>https://scifaro.com/en/abs/non-causal-to-causal-ssl-supported-transfer-learning-towards-a-high-performance-low-latency-speech-vocoder-2408.11842</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-causal-to-causal-ssl-supported-transfer-learning-towards-a-high-performance-low-latency-speech-vocoder-2408.11842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-causal-to-causal-ssl-supported-transfer-learning-towards-a-high-performance-low-latency-speech-vocoder-2408.11842"/></url>
<url><loc>https://scifaro.com/en/abs/parameter-efficient-transfer-learning-under-federated-learning-for-automatic-speech-recognition-2408.11873</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-under-federated-learning-for-automatic-speech-recognition-2408.11873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/parameter-efficient-transfer-learning-under-federated-learning-for-automatic-speech-recognition-2408.11873"/></url>
<url><loc>https://scifaro.com/en/abs/the-whole-is-bigger-than-the-sum-of-its-parts-modeling-individual-annotators-to-capture-emotional-variability-2408.11956</loc><lastmod>2025-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-whole-is-bigger-than-the-sum-of-its-parts-modeling-individual-annotators-to-capture-emotional-variability-2408.11956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-whole-is-bigger-than-the-sum-of-its-parts-modeling-individual-annotators-to-capture-emotional-variability-2408.11956"/></url>
<url><loc>https://scifaro.com/en/abs/lcm-svc-latent-diffusion-model-based-singing-voice-conversion-with-inference-acceleration-via-latent-consistency-distillation-2408.12354</loc><lastmod>2024-08-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lcm-svc-latent-diffusion-model-based-singing-voice-conversion-with-inference-acceleration-via-latent-consistency-distillation-2408.12354"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lcm-svc-latent-diffusion-model-based-singing-voice-conversion-with-inference-acceleration-via-latent-consistency-distillation-2408.12354"/></url>
<url><loc>https://scifaro.com/en/abs/dynamic-gated-recurrent-neural-network-for-compute-efficient-speech-enhancement-2408.12425</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dynamic-gated-recurrent-neural-network-for-compute-efficient-speech-enhancement-2408.12425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dynamic-gated-recurrent-neural-network-for-compute-efficient-speech-enhancement-2408.12425"/></url>
<url><loc>https://scifaro.com/en/abs/inference-adaptive-neural-steering-for-real-time-area-based-sound-source-separation-2408.12982</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/inference-adaptive-neural-steering-for-real-time-area-based-sound-source-separation-2408.12982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/inference-adaptive-neural-steering-for-real-time-area-based-sound-source-separation-2408.12982"/></url>
<url><loc>https://scifaro.com/en/abs/speechprompt-prompting-speech-language-models-for-speech-processing-tasks-2408.13040</loc><lastmod>2024-08-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechprompt-prompting-speech-language-models-for-speech-processing-tasks-2408.13040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechprompt-prompting-speech-language-models-for-speech-processing-tasks-2408.13040"/></url>
<url><loc>https://scifaro.com/en/abs/as-biased-as-you-measure-methodological-pitfalls-of-bias-evaluations-in-speaker-verification-research-2408.13614</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/as-biased-as-you-measure-methodological-pitfalls-of-bias-evaluations-in-speaker-verification-research-2408.13614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/as-biased-as-you-measure-methodological-pitfalls-of-bias-evaluations-in-speaker-verification-research-2408.13614"/></url>
<url><loc>https://scifaro.com/en/abs/chirp-group-delay-based-onset-detection-in-instruments-with-fast-attack-2408.13734</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chirp-group-delay-based-onset-detection-in-instruments-with-fast-attack-2408.13734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chirp-group-delay-based-onset-detection-in-instruments-with-fast-attack-2408.13734"/></url>
<url><loc>https://scifaro.com/en/abs/literary-and-colloquial-tamil-dialect-identification-2408.13739</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/literary-and-colloquial-tamil-dialect-identification-2408.13739"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/literary-and-colloquial-tamil-dialect-identification-2408.13739"/></url>
<url><loc>https://scifaro.com/en/abs/quartered-spectral-envelope-and-1d-cnn-based-classification-of-normally-phonated-and-whispered-speech-2408.13746</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quartered-spectral-envelope-and-1d-cnn-based-classification-of-normally-phonated-and-whispered-speech-2408.13746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quartered-spectral-envelope-and-1d-cnn-based-classification-of-normally-phonated-and-whispered-speech-2408.13746"/></url>
<url><loc>https://scifaro.com/en/abs/combined-assessment-of-auditory-distance-perception-and-externalization-2408.14198</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/combined-assessment-of-auditory-distance-perception-and-externalization-2408.14198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/combined-assessment-of-auditory-distance-perception-and-externalization-2408.14198"/></url>
<url><loc>https://scifaro.com/en/abs/reduce-computational-complexity-for-continuous-wavelet-transform-in-acoustic-recognition-using-hop-size-2408.14302</loc><lastmod>2025-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reduce-computational-complexity-for-continuous-wavelet-transform-in-acoustic-recognition-using-hop-size-2408.14302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reduce-computational-complexity-for-continuous-wavelet-transform-in-acoustic-recognition-using-hop-size-2408.14302"/></url>
<url><loc>https://scifaro.com/en/abs/spoken-term-discovery-using-discrete-speech-units-2408.14390</loc><lastmod>2024-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoken-term-discovery-using-discrete-speech-units-2408.14390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoken-term-discovery-using-discrete-speech-units-2408.14390"/></url>
<url><loc>https://scifaro.com/en/abs/dualspeech-enhancing-speaker-fidelity-and-text-intelligibility-through-dual-classifier-free-guidance-2408.14423</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualspeech-enhancing-speaker-fidelity-and-text-intelligibility-through-dual-classifier-free-guidance-2408.14423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualspeech-enhancing-speaker-fidelity-and-text-intelligibility-through-dual-classifier-free-guidance-2408.14423"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-discriminative-deep-learning-based-noise-reduction-methods-in-low-snr-scenarios-2408.14582</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-discriminative-deep-learning-based-noise-reduction-methods-in-low-snr-scenarios-2408.14582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-discriminative-deep-learning-based-noise-reduction-methods-in-low-snr-scenarios-2408.14582"/></url>
<url><loc>https://scifaro.com/en/abs/is-audio-spoof-detection-robust-to-laundering-attacks-2408.14712</loc><lastmod>2025-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-audio-spoof-detection-robust-to-laundering-attacks-2408.14712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-audio-spoof-detection-robust-to-laundering-attacks-2408.14712"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-noisy-labels-on-sound-event-detection-deletion-errors-are-more-detrimental-than-insertion-errors-2408.14771</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-noisy-labels-on-sound-event-detection-deletion-errors-are-more-detrimental-than-insertion-errors-2408.14771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-noisy-labels-on-sound-event-detection-deletion-errors-are-more-detrimental-than-insertion-errors-2408.14771"/></url>
<url><loc>https://scifaro.com/en/abs/quartered-chirp-spectral-envelope-for-whispered-vs-normal-speech-classification-2408.14777</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quartered-chirp-spectral-envelope-for-whispered-vs-normal-speech-classification-2408.14777"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quartered-chirp-spectral-envelope-for-whispered-vs-normal-speech-classification-2408.14777"/></url>
<url><loc>https://scifaro.com/en/abs/maskcyclegan-based-whisper-to-normal-speech-conversion-2408.14797</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/maskcyclegan-based-whisper-to-normal-speech-conversion-2408.14797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/maskcyclegan-based-whisper-to-normal-speech-conversion-2408.14797"/></url>
<url><loc>https://scifaro.com/en/abs/similarity-metrics-for-late-reverberation-2408.14836</loc><lastmod>2025-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/similarity-metrics-for-late-reverberation-2408.14836"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/similarity-metrics-for-late-reverberation-2408.14836"/></url>
<url><loc>https://scifaro.com/en/abs/literary-and-colloquial-dialect-identification-for-tamil-using-acoustic-features-2408.14887</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/literary-and-colloquial-dialect-identification-for-tamil-using-acoustic-features-2408.14887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/literary-and-colloquial-dialect-identification-for-tamil-using-acoustic-features-2408.14887"/></url>
<url><loc>https://scifaro.com/en/abs/development-of-large-annotated-music-datasets-using-hmm-based-forced-viterbi-alignment-2408.14890</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-of-large-annotated-music-datasets-using-hmm-based-forced-viterbi-alignment-2408.14890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-of-large-annotated-music-datasets-using-hmm-based-forced-viterbi-alignment-2408.14890"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-continuous-and-binary-relevances-in-audio-text-relevance-learning-2408.14939</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-continuous-and-binary-relevances-in-audio-text-relevance-learning-2408.14939"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-continuous-and-binary-relevances-in-audio-text-relevance-learning-2408.14939"/></url>
<url><loc>https://scifaro.com/en/abs/infusing-acoustic-pause-context-into-text-based-dementia-assessment-2408.15188</loc><lastmod>2024-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/infusing-acoustic-pause-context-into-text-based-dementia-assessment-2408.15188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/infusing-acoustic-pause-context-into-text-based-dementia-assessment-2408.15188"/></url>
<url><loc>https://scifaro.com/en/abs/feature-representations-for-automatic-meerkat-vocalization-classification-2408.15296</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feature-representations-for-automatic-meerkat-vocalization-classification-2408.15296"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feature-representations-for-automatic-meerkat-vocalization-classification-2408.15296"/></url>
<url><loc>https://scifaro.com/en/abs/yolo-stutter-end-to-end-region-wise-speech-dysfluency-detection-2408.15297</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yolo-stutter-end-to-end-region-wise-speech-dysfluency-detection-2408.15297"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yolo-stutter-end-to-end-region-wise-speech-dysfluency-detection-2408.15297"/></url>
<url><loc>https://scifaro.com/en/abs/examining-the-interplay-between-privacy-and-fairness-for-speech-processing-a-review-and-perspective-2408.15391</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/examining-the-interplay-between-privacy-and-fairness-for-speech-processing-a-review-and-perspective-2408.15391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/examining-the-interplay-between-privacy-and-fairness-for-speech-processing-a-review-and-perspective-2408.15391"/></url>
<url><loc>https://scifaro.com/en/abs/drop-the-beat-freestyler-for-accompaniment-conditioned-rapping-voice-generation-2408.15474</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/drop-the-beat-freestyler-for-accompaniment-conditioned-rapping-voice-generation-2408.15474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/drop-the-beat-freestyler-for-accompaniment-conditioned-rapping-voice-generation-2408.15474"/></url>
<url><loc>https://scifaro.com/en/abs/noise-to-mask-ratio-loss-for-deep-neural-network-based-audio-watermarking-2408.15553</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-to-mask-ratio-loss-for-deep-neural-network-based-audio-watermarking-2408.15553"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-to-mask-ratio-loss-for-deep-neural-network-based-audio-watermarking-2408.15553"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-masking-with-explicit-time-context-windowing-for-neural-network-based-monaural-speech-enhancement-2408.15582</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-masking-with-explicit-time-context-windowing-for-neural-network-based-monaural-speech-enhancement-2408.15582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-masking-with-explicit-time-context-windowing-for-neural-network-based-monaural-speech-enhancement-2408.15582"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-approach-for-low-complexity-joint-acoustic-echo-and-noise-reduction-2408.15746</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-approach-for-low-complexity-joint-acoustic-echo-and-noise-reduction-2408.15746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-approach-for-low-complexity-joint-acoustic-echo-and-noise-reduction-2408.15746"/></url>
<url><loc>https://scifaro.com/en/abs/wav2pos-sound-source-localization-using-masked-autoencoders-2408.15771</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wav2pos-sound-source-localization-using-masked-autoencoders-2408.15771"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wav2pos-sound-source-localization-using-masked-autoencoders-2408.15771"/></url>
<url><loc>https://scifaro.com/en/abs/easy-interpretable-effective-opensmile-for-voice-deepfake-detection-2408.15775</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/easy-interpretable-effective-opensmile-for-voice-deepfake-detection-2408.15775"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/easy-interpretable-effective-opensmile-for-voice-deepfake-detection-2408.15775"/></url>
<url><loc>https://scifaro.com/en/abs/modalitymirror-improving-audio-classification-in-modality-heterogeneity-federated-learning-with-multimodal-distillation-2408.15803</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modalitymirror-improving-audio-classification-in-modality-heterogeneity-federated-learning-with-multimodal-distillation-2408.15803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modalitymirror-improving-audio-classification-in-modality-heterogeneity-federated-learning-with-multimodal-distillation-2408.15803"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-robust-speaker-verification-using-parallel-embedding-fusion-btu-speech-group-s-approach-for-asvspoof5-challenge-2408.15877</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-robust-speaker-verification-using-parallel-embedding-fusion-btu-speech-group-s-approach-for-asvspoof5-challenge-2408.15877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-robust-speaker-verification-using-parallel-embedding-fusion-btu-speech-group-s-approach-for-asvspoof5-challenge-2408.15877"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-adversarial-training-for-zero-shot-voice-cloning-2408.15916</loc><lastmod>2024-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-adversarial-training-for-zero-shot-voice-cloning-2408.15916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-adversarial-training-for-zero-shot-voice-cloning-2408.15916"/></url>
<url><loc>https://scifaro.com/en/abs/svdd-2024-the-inaugural-singing-voice-deepfake-detection-challenge-2408.16132</loc><lastmod>2026-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/svdd-2024-the-inaugural-singing-voice-deepfake-detection-challenge-2408.16132"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/svdd-2024-the-inaugural-singing-voice-deepfake-detection-challenge-2408.16132"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-japanese-speech-recognition-on-asr-llm-setups-with-multi-pass-augmented-generative-error-correction-2408.16180</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-japanese-speech-recognition-on-asr-llm-setups-with-multi-pass-augmented-generative-error-correction-2408.16180"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-japanese-speech-recognition-on-asr-llm-setups-with-multi-pass-augmented-generative-error-correction-2408.16180"/></url>
<url><loc>https://scifaro.com/en/abs/ssdm-scalable-speech-dysfluency-modeling-2408.16221</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssdm-scalable-speech-dysfluency-modeling-2408.16221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssdm-scalable-speech-dysfluency-modeling-2408.16221"/></url>
<url><loc>https://scifaro.com/en/abs/denoising-of-photogrammetric-dummy-head-ear-point-clouds-for-individual-head-related-transfer-functions-computation-2408.16410</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/denoising-of-photogrammetric-dummy-head-ear-point-clouds-for-individual-head-related-transfer-functions-computation-2408.16410"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/denoising-of-photogrammetric-dummy-head-ear-point-clouds-for-individual-head-related-transfer-functions-computation-2408.16410"/></url>
<url><loc>https://scifaro.com/en/abs/whisma-a-speech-llm-to-perform-zero-shot-spoken-language-understanding-2408.16423</loc><lastmod>2024-08-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisma-a-speech-llm-to-perform-zero-shot-spoken-language-understanding-2408.16423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisma-a-speech-llm-to-perform-zero-shot-spoken-language-understanding-2408.16423"/></url>
<url><loc>https://scifaro.com/en/abs/wavtokenizer-an-efficient-acoustic-discrete-codec-tokenizer-for-audio-language-modeling-2408.16532</loc><lastmod>2025-02-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavtokenizer-an-efficient-acoustic-discrete-codec-tokenizer-for-audio-language-modeling-2408.16532"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavtokenizer-an-efficient-acoustic-discrete-codec-tokenizer-for-audio-language-modeling-2408.16532"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-voice-synthesis-through-human-in-the-loop-coordinate-descent-2408.17068</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-voice-synthesis-through-human-in-the-loop-coordinate-descent-2408.17068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-voice-synthesis-through-human-in-the-loop-coordinate-descent-2408.17068"/></url>
<url><loc>https://scifaro.com/en/abs/recursive-attentive-pooling-for-extracting-speaker-embeddings-from-multi-speaker-recordings-2408.17142</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recursive-attentive-pooling-for-extracting-speaker-embeddings-from-multi-speaker-recordings-2408.17142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recursive-attentive-pooling-for-extracting-speaker-embeddings-from-multi-speaker-recordings-2408.17142"/></url>
<url><loc>https://scifaro.com/en/abs/learning-multi-target-tdoa-features-for-sound-event-localization-and-detection-2408.17166</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-multi-target-tdoa-features-for-sound-event-localization-and-detection-2408.17166"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-multi-target-tdoa-features-for-sound-event-localization-and-detection-2408.17166"/></url>
<url><loc>https://scifaro.com/en/abs/codec-does-matter-exploring-the-semantic-shortcoming-of-codec-for-audio-language-model-2408.17175</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codec-does-matter-exploring-the-semantic-shortcoming-of-codec-for-audio-language-model-2408.17175"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codec-does-matter-exploring-the-semantic-shortcoming-of-codec-for-audio-language-model-2408.17175"/></url>
<url><loc>https://scifaro.com/en/abs/advancing-multi-talker-asr-performance-with-large-language-models-2408.17431</loc><lastmod>2024-09-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advancing-multi-talker-asr-performance-with-large-language-models-2408.17431"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advancing-multi-talker-asr-performance-with-large-language-models-2408.17431"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-for-unseen-speakers-via-low-complexity-discrete-unit-based-frame-selection-2408.17432</loc><lastmod>2025-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-for-unseen-speakers-via-low-complexity-discrete-unit-based-frame-selection-2408.17432"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-for-unseen-speakers-via-low-complexity-discrete-unit-based-frame-selection-2408.17432"/></url>
<url><loc>https://scifaro.com/en/abs/progressive-residual-extraction-based-pre-training-for-speech-representation-learning-2409.00387</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/progressive-residual-extraction-based-pre-training-for-speech-representation-learning-2409.00387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/progressive-residual-extraction-based-pre-training-for-speech-representation-learning-2409.00387"/></url>
<url><loc>https://scifaro.com/en/abs/dcim-avsr-efficient-audio-visual-speech-recognition-via-dual-conformer-interaction-module-2409.00481</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcim-avsr-efficient-audio-visual-speech-recognition-via-dual-conformer-interaction-module-2409.00481"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcim-avsr-efficient-audio-visual-speech-recognition-via-dual-conformer-interaction-module-2409.00481"/></url>
<url><loc>https://scifaro.com/en/abs/digit-recognition-using-multimodal-spiking-neural-networks-2409.00552</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/digit-recognition-using-multimodal-spiking-neural-networks-2409.00552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/digit-recognition-using-multimodal-spiking-neural-networks-2409.00552"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-analysis-of-modality-fusion-approaches-for-audio-visual-person-identification-and-verification-2409.00562</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-analysis-of-modality-fusion-approaches-for-audio-visual-person-identification-and-verification-2409.00562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-analysis-of-modality-fusion-approaches-for-audio-visual-person-identification-and-verification-2409.00562"/></url>
<url><loc>https://scifaro.com/en/abs/expanding-on-enclap-with-auxiliary-retrieval-model-for-automated-audio-captioning-2409.01160</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expanding-on-enclap-with-auxiliary-retrieval-model-for-automated-audio-captioning-2409.01160"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expanding-on-enclap-with-auxiliary-retrieval-model-for-automated-audio-captioning-2409.01160"/></url>
<url><loc>https://scifaro.com/en/abs/enclap-analyzing-the-enclap-framework-for-optimizing-automated-audio-captioning-performance-2409.01201</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enclap-analyzing-the-enclap-framework-for-optimizing-automated-audio-captioning-performance-2409.01201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enclap-analyzing-the-enclap-framework-for-optimizing-automated-audio-captioning-performance-2409.01201"/></url>
<url><loc>https://scifaro.com/en/abs/suppressing-noise-disparity-in-training-data-for-automatic-pathological-speech-detection-2409.01209</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/suppressing-noise-disparity-in-training-data-for-automatic-pathological-speech-detection-2409.01209"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/suppressing-noise-disparity-in-training-data-for-automatic-pathological-speech-detection-2409.01209"/></url>
<url><loc>https://scifaro.com/en/abs/resource-efficient-adaptation-of-speech-foundation-models-for-multi-speaker-asr-2409.01438</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resource-efficient-adaptation-of-speech-foundation-models-for-multi-speaker-asr-2409.01438"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resource-efficient-adaptation-of-speech-foundation-models-for-multi-speaker-asr-2409.01438"/></url>
<url><loc>https://scifaro.com/en/abs/steered-response-power-based-direction-of-arrival-estimation-exploiting-an-auxiliary-microphone-2409.01776</loc><lastmod>2024-09-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/steered-response-power-based-direction-of-arrival-estimation-exploiting-an-auxiliary-microphone-2409.01776"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/steered-response-power-based-direction-of-arrival-estimation-exploiting-an-auxiliary-microphone-2409.01776"/></url>
<url><loc>https://scifaro.com/en/abs/comparative-study-on-noise-augmented-training-and-its-effect-on-adversarial-robustness-in-asr-systems-2409.01813</loc><lastmod>2025-11-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparative-study-on-noise-augmented-training-and-its-effect-on-adversarial-robustness-in-asr-systems-2409.01813"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparative-study-on-noise-augmented-training-and-its-effect-on-adversarial-robustness-in-asr-systems-2409.01813"/></url>
<url><loc>https://scifaro.com/en/abs/vec2wav-2-0-advancing-voice-conversion-via-discrete-token-vocoders-2409.01995</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vec2wav-2-0-advancing-voice-conversion-via-discrete-token-vocoders-2409.01995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vec2wav-2-0-advancing-voice-conversion-via-discrete-token-vocoders-2409.01995"/></url>
<url><loc>https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-notsofar-1-challenge-2409.02041</loc><lastmod>2024-10-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-notsofar-1-challenge-2409.02041"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-notsofar-1-challenge-2409.02041"/></url>
<url><loc>https://scifaro.com/en/abs/speech-foundation-model-ensembles-for-the-controlled-singing-voice-deepfake-detection-ctrsvdd-challenge-2024-2409.02302</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-foundation-model-ensembles-for-the-controlled-singing-voice-deepfake-detection-ctrsvdd-challenge-2024-2409.02302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-foundation-model-ensembles-for-the-controlled-singing-voice-deepfake-detection-ctrsvdd-challenge-2024-2409.02302"/></url>
<url><loc>https://scifaro.com/en/abs/fast-high-quality-and-parameter-efficient-articulatory-synthesis-using-differentiable-dsp-2409.02451</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-high-quality-and-parameter-efficient-articulatory-synthesis-using-differentiable-dsp-2409.02451"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-high-quality-and-parameter-efficient-articulatory-synthesis-using-differentiable-dsp-2409.02451"/></url>
<url><loc>https://scifaro.com/en/abs/cuempathy-a-counseling-speech-dataset-for-psychotherapy-research-2409.02466</loc><lastmod>2024-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cuempathy-a-counseling-speech-dataset-for-psychotherapy-research-2409.02466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cuempathy-a-counseling-speech-dataset-for-psychotherapy-research-2409.02466"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-extraction-of-noise-robust-discrete-units-from-self-supervised-speech-models-2409.02565</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-extraction-of-noise-robust-discrete-units-from-self-supervised-speech-models-2409.02565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-extraction-of-noise-robust-discrete-units-from-self-supervised-speech-models-2409.02565"/></url>
<url><loc>https://scifaro.com/en/abs/usef-tse-universal-speaker-embedding-free-target-speaker-extraction-2409.02615</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usef-tse-universal-speaker-embedding-free-target-speaker-extraction-2409.02615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usef-tse-universal-speaker-embedding-free-target-speaker-extraction-2409.02615"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-audio-signal-enhancement-a-multi-output-mvdr-method-in-the-spherical-harmonic-domain-2409.03269</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-audio-signal-enhancement-a-multi-output-mvdr-method-in-the-spherical-harmonic-domain-2409.03269"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-audio-signal-enhancement-a-multi-output-mvdr-method-in-the-spherical-harmonic-domain-2409.03269"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-and-style-disentanglement-of-speech-based-on-contrastive-predictive-coding-supported-factorized-variational-autoencoder-2409.03520</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-and-style-disentanglement-of-speech-based-on-contrastive-predictive-coding-supported-factorized-variational-autoencoder-2409.03520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-and-style-disentanglement-of-speech-based-on-contrastive-predictive-coding-supported-factorized-variational-autoencoder-2409.03520"/></url>
<url><loc>https://scifaro.com/en/abs/a-dual-path-framework-with-frequency-and-time-excited-network-for-anomalous-sound-detection-2409.03610</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dual-path-framework-with-frequency-and-time-excited-network-for-anomalous-sound-detection-2409.03610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dual-path-framework-with-frequency-and-time-excited-network-for-anomalous-sound-detection-2409.03610"/></url>
<url><loc>https://scifaro.com/en/abs/zsdevc-zero-shot-diffusion-based-emotional-voice-conversion-with-disentangled-mechanism-2409.03636</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zsdevc-zero-shot-diffusion-based-emotional-voice-conversion-with-disentangled-mechanism-2409.03636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zsdevc-zero-shot-diffusion-based-emotional-voice-conversion-with-disentangled-mechanism-2409.03636"/></url>
<url><loc>https://scifaro.com/en/abs/privacy-versus-emotion-preservation-trade-offs-in-emotion-preserving-speaker-anonymization-2409.03655</loc><lastmod>2024-09-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/privacy-versus-emotion-preservation-trade-offs-in-emotion-preserving-speaker-anonymization-2409.03655"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/privacy-versus-emotion-preservation-trade-offs-in-emotion-preserving-speaker-anonymization-2409.03655"/></url>
<url><loc>https://scifaro.com/en/abs/development-of-the-listening-in-spatialized-noise-sentences-lisn-s-test-in-brazilian-portuguese-presentation-software-speech-stimuli-and-sentence-equivalence-2409.04014</loc><lastmod>2024-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/development-of-the-listening-in-spatialized-noise-sentences-lisn-s-test-in-brazilian-portuguese-presentation-software-speech-stimuli-and-sentence-equivalence-2409.04014"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/development-of-the-listening-in-spatialized-noise-sentences-lisn-s-test-in-brazilian-portuguese-presentation-software-speech-stimuli-and-sentence-equivalence-2409.04014"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-own-voice-reconstruction-for-hearables-with-an-in-ear-microphone-2409.04136</loc><lastmod>2025-08-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-own-voice-reconstruction-for-hearables-with-an-in-ear-microphone-2409.04136"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-own-voice-reconstruction-for-hearables-with-an-in-ear-microphone-2409.04136"/></url>
<url><loc>https://scifaro.com/en/abs/npu-ntu-system-for-voice-privacy-2024-challenge-2409.04173</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/npu-ntu-system-for-voice-privacy-2024-challenge-2409.04173"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/npu-ntu-system-for-voice-privacy-2024-challenge-2409.04173"/></url>
<url><loc>https://scifaro.com/en/abs/cross-attention-inspired-selective-state-space-models-for-target-sound-extraction-2409.04803</loc><lastmod>2025-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-attention-inspired-selective-state-space-models-for-target-sound-extraction-2409.04803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-attention-inspired-selective-state-space-models-for-target-sound-extraction-2409.04803"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-sound-source-trajectories-for-universal-sound-separation-2409.04843</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-sound-source-trajectories-for-universal-sound-separation-2409.04843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-sound-source-trajectories-for-universal-sound-separation-2409.04843"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-wavlm-back-ends-for-speech-spoofing-and-deepfake-detection-2409.05032</loc><lastmod>2025-06-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-wavlm-back-ends-for-speech-spoofing-and-deepfake-detection-2409.05032"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-wavlm-back-ends-for-speech-spoofing-and-deepfake-detection-2409.05032"/></url>
<url><loc>https://scifaro.com/en/abs/tf-mamba-a-time-frequency-network-for-sound-source-localization-2409.05034</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tf-mamba-a-time-frequency-network-for-sound-source-localization-2409.05034"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tf-mamba-a-time-frequency-network-for-sound-source-localization-2409.05034"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-schr-odinger-bridge-and-symmetric-noise-schedule-2409.05116</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-schr-odinger-bridge-and-symmetric-noise-schedule-2409.05116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-speech-enhancement-with-schr-odinger-bridge-and-symmetric-noise-schedule-2409.05116"/></url>
<url><loc>https://scifaro.com/en/abs/ss-brpe-self-supervised-blind-room-parameter-estimation-using-attention-mechanisms-2409.05212</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ss-brpe-self-supervised-blind-room-parameter-estimation-using-attention-mechanisms-2409.05212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ss-brpe-self-supervised-blind-room-parameter-estimation-using-attention-mechanisms-2409.05212"/></url>
<url><loc>https://scifaro.com/en/abs/bigcodec-pushing-the-limits-of-low-bitrate-neural-speech-codec-2409.05377</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bigcodec-pushing-the-limits-of-low-bitrate-neural-speech-codec-2409.05377"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bigcodec-pushing-the-limits-of-low-bitrate-neural-speech-codec-2409.05377"/></url>
<url><loc>https://scifaro.com/en/abs/findings-of-the-2024-mandarin-stuttering-event-detection-and-automatic-speech-recognition-challenge-2409.05430</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/findings-of-the-2024-mandarin-stuttering-event-detection-and-automatic-speech-recognition-challenge-2409.05430"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/findings-of-the-2024-mandarin-stuttering-event-detection-and-automatic-speech-recognition-challenge-2409.05430"/></url>
<url><loc>https://scifaro.com/en/abs/ntt-multi-speaker-asr-system-for-the-dasr-task-of-chime-8-challenge-2409.05554</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ntt-multi-speaker-asr-system-for-the-dasr-task-of-chime-8-challenge-2409.05554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ntt-multi-speaker-asr-system-for-the-dasr-task-of-chime-8-challenge-2409.05554"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-content-and-acoustic-representations-for-speech-emotion-recognition-2409.05566</loc><lastmod>2025-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-content-and-acoustic-representations-for-speech-emotion-recognition-2409.05566"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-content-and-acoustic-representations-for-speech-emotion-recognition-2409.05566"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-modularity-for-noise-robustness-in-conformer-based-asr-2409.05589</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-modularity-for-noise-robustness-in-conformer-based-asr-2409.05589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-modularity-for-noise-robustness-in-conformer-based-asr-2409.05589"/></url>
<url><loc>https://scifaro.com/en/abs/longer-is-not-necessarily-stronger-punctuated-long-sequence-training-for-enhanced-speech-recognition-and-translation-2409.05601</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/longer-is-not-necessarily-stronger-punctuated-long-sequence-training-for-enhanced-speech-recognition-and-translation-2409.05601"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/longer-is-not-necessarily-stronger-punctuated-long-sequence-training-for-enhanced-speech-recognition-and-translation-2409.05601"/></url>
<url><loc>https://scifaro.com/en/abs/as-speech-adaptive-style-for-speech-synthesis-2409.05730</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/as-speech-adaptive-style-for-speech-synthesis-2409.05730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/as-speech-adaptive-style-for-speech-synthesis-2409.05730"/></url>
<url><loc>https://scifaro.com/en/abs/a-toolkit-for-joint-speaker-diarization-and-identification-with-application-to-speaker-attributed-asr-2409.05750</loc><lastmod>2024-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-toolkit-for-joint-speaker-diarization-and-identification-with-application-to-speaker-attributed-asr-2409.05750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-toolkit-for-joint-speaker-diarization-and-identification-with-application-to-speaker-attributed-asr-2409.05750"/></url>
<url><loc>https://scifaro.com/en/abs/property-neurons-in-self-supervised-speech-transformers-2409.05910</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/property-neurons-in-self-supervised-speech-transformers-2409.05910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/property-neurons-in-self-supervised-speech-transformers-2409.05910"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-correction-of-named-entity-speech-recognition-errors-2409.06062</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-correction-of-named-entity-speech-recognition-errors-2409.06062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-correction-of-named-entity-speech-recognition-errors-2409.06062"/></url>
<url><loc>https://scifaro.com/en/abs/estimating-the-completeness-of-discrete-speech-units-2409.06109</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimating-the-completeness-of-discrete-speech-units-2409.06109"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimating-the-completeness-of-discrete-speech-units-2409.06109"/></url>
<url><loc>https://scifaro.com/en/abs/vc-enhance-speech-restoration-with-integrated-noise-suppression-and-voice-conversion-2409.06126</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vc-enhance-speech-restoration-with-integrated-noise-suppression-and-voice-conversion-2409.06126"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vc-enhance-speech-restoration-with-integrated-noise-suppression-and-voice-conversion-2409.06126"/></url>
<url><loc>https://scifaro.com/en/abs/dewinder-single-channel-wind-noise-reduction-using-ultrasound-sensing-2409.06137</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dewinder-single-channel-wind-noise-reduction-using-ultrasound-sensing-2409.06137"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dewinder-single-channel-wind-noise-reduction-using-ultrasound-sensing-2409.06137"/></url>
<url><loc>https://scifaro.com/en/abs/multi-source-music-generation-with-latent-diffusion-2409.06190</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-source-music-generation-with-latent-diffusion-2409.06190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-source-music-generation-with-latent-diffusion-2409.06190"/></url>
<url><loc>https://scifaro.com/en/abs/spoofing-aware-speaker-verification-robust-against-domain-and-channel-mismatches-2409.06327</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-robust-against-domain-and-channel-mismatches-2409.06327"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spoofing-aware-speaker-verification-robust-against-domain-and-channel-mismatches-2409.06327"/></url>
<url><loc>https://scifaro.com/en/abs/instructsing-high-fidelity-singing-voice-generation-via-instructing-yourself-2409.06330</loc><lastmod>2024-09-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/instructsing-high-fidelity-singing-voice-generation-via-instructing-yourself-2409.06330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/instructsing-high-fidelity-singing-voice-generation-via-instructing-yourself-2409.06330"/></url>
<url><loc>https://scifaro.com/en/abs/janssen-2-0-audio-inpainting-in-the-time-frequency-domain-2409.06392</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/janssen-2-0-audio-inpainting-in-the-time-frequency-domain-2409.06392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/janssen-2-0-audio-inpainting-in-the-time-frequency-domain-2409.06392"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-differences-between-human-perception-and-model-inference-in-audio-event-recognition-2409.06580</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-differences-between-human-perception-and-model-inference-in-audio-event-recognition-2409.06580"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-differences-between-human-perception-and-model-inference-in-audio-event-recognition-2409.06580"/></url>
<url><loc>https://scifaro.com/en/abs/sortformer-a-novel-approach-for-permutation-resolved-speaker-supervision-in-speech-to-text-systems-2409.06656</loc><lastmod>2025-07-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sortformer-a-novel-approach-for-permutation-resolved-speaker-supervision-in-speech-to-text-systems-2409.06656"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sortformer-a-novel-approach-for-permutation-resolved-speaker-supervision-in-speech-to-text-systems-2409.06656"/></url>
<url><loc>https://scifaro.com/en/abs/neural-ambisonic-encoding-for-multi-speaker-scenarios-using-a-circular-microphone-array-2409.06954</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-ambisonic-encoding-for-multi-speaker-scenarios-using-a-circular-microphone-array-2409.06954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-ambisonic-encoding-for-multi-speaker-scenarios-using-a-circular-microphone-array-2409.06954"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-text-to-speech-as-golden-speech-generator-a-systematic-framework-and-its-applicability-in-automatic-pronunciation-assessment-2409.07151</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-as-golden-speech-generator-a-systematic-framework-and-its-applicability-in-automatic-pronunciation-assessment-2409.07151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-as-golden-speech-generator-a-systematic-framework-and-its-applicability-in-automatic-pronunciation-assessment-2409.07151"/></url>
<url><loc>https://scifaro.com/en/abs/rethinking-mamba-in-speech-processing-by-self-supervised-models-2409.07273</loc><lastmod>2024-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rethinking-mamba-in-speech-processing-by-self-supervised-models-2409.07273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rethinking-mamba-in-speech-processing-by-self-supervised-models-2409.07273"/></url>
<url><loc>https://scifaro.com/en/abs/ssr-speech-towards-stable-safe-and-robust-zero-shot-text-based-speech-editing-and-synthesis-2409.07556</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssr-speech-towards-stable-safe-and-robust-zero-shot-text-based-speech-editing-and-synthesis-2409.07556"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssr-speech-towards-stable-safe-and-robust-zero-shot-text-based-speech-editing-and-synthesis-2409.07556"/></url>
<url><loc>https://scifaro.com/en/abs/super-monotonic-alignment-search-2409.07704</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/super-monotonic-alignment-search-2409.07704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/super-monotonic-alignment-search-2409.07704"/></url>
<url><loc>https://scifaro.com/en/abs/music-auto-tagging-in-the-long-tail-a-few-shot-approach-2409.07730</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-auto-tagging-in-the-long-tail-a-few-shot-approach-2409.07730"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-auto-tagging-in-the-long-tail-a-few-shot-approach-2409.07730"/></url>
<url><loc>https://scifaro.com/en/abs/layer-aware-tdnn-speaker-recognition-using-multi-layer-features-from-pre-trained-models-2409.07770</loc><lastmod>2025-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/layer-aware-tdnn-speaker-recognition-using-multi-layer-features-from-pre-trained-models-2409.07770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/layer-aware-tdnn-speaker-recognition-using-multi-layer-features-from-pre-trained-models-2409.07770"/></url>
<url><loc>https://scifaro.com/en/abs/audio-decoding-by-inverse-problem-solving-2409.07858</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-decoding-by-inverse-problem-solving-2409.07858"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-decoding-by-inverse-problem-solving-2409.07858"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-and-defending-against-adversarial-attacks-on-automatic-speech-recognition-via-diffusion-models-2409.07936</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-and-defending-against-adversarial-attacks-on-automatic-speech-recognition-via-diffusion-models-2409.07936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-and-defending-against-adversarial-attacks-on-automatic-speech-recognition-via-diffusion-models-2409.07936"/></url>
<url><loc>https://scifaro.com/en/abs/auto-landmark-acoustic-landmark-dataset-and-open-source-toolkit-for-landmark-extraction-2409.07969</loc><lastmod>2025-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/auto-landmark-acoustic-landmark-dataset-and-open-source-toolkit-for-landmark-extraction-2409.07969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/auto-landmark-acoustic-landmark-dataset-and-open-source-toolkit-for-landmark-extraction-2409.07969"/></url>
<url><loc>https://scifaro.com/en/abs/faster-speech-llama-inference-with-multi-token-prediction-2409.08148</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/faster-speech-llama-inference-with-multi-token-prediction-2409.08148"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/faster-speech-llama-inference-with-multi-token-prediction-2409.08148"/></url>
<url><loc>https://scifaro.com/en/abs/dark-experience-for-incremental-keyword-spotting-2409.08153</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dark-experience-for-incremental-keyword-spotting-2409.08153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dark-experience-for-incremental-keyword-spotting-2409.08153"/></url>
<url><loc>https://scifaro.com/en/abs/hierarchical-symbolic-pop-music-generation-with-graph-neural-networks-2409.08155</loc><lastmod>2024-09-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hierarchical-symbolic-pop-music-generation-with-graph-neural-networks-2409.08155"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hierarchical-symbolic-pop-music-generation-with-graph-neural-networks-2409.08155"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-sparse-coding-with-the-adaptive-locally-competitive-algorithm-for-speech-classification-2409.08188</loc><lastmod>2025-09-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-sparse-coding-with-the-adaptive-locally-competitive-algorithm-for-speech-classification-2409.08188"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-sparse-coding-with-the-adaptive-locally-competitive-algorithm-for-speech-classification-2409.08188"/></url>
<url><loc>https://scifaro.com/en/abs/detection-of-electric-motor-damage-through-analysis-of-sound-signals-using-bayesian-neural-networks-2409.08309</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detection-of-electric-motor-damage-through-analysis-of-sound-signals-using-bayesian-neural-networks-2409.08309"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detection-of-electric-motor-damage-through-analysis-of-sound-signals-using-bayesian-neural-networks-2409.08309"/></url>
<url><loc>https://scifaro.com/en/abs/towards-quantifying-and-reducing-language-mismatch-effects-in-cross-lingual-speech-anti-spoofing-2409.08346</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-quantifying-and-reducing-language-mismatch-effects-in-cross-lingual-speech-anti-spoofing-2409.08346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-quantifying-and-reducing-language-mismatch-effects-in-cross-lingual-speech-anti-spoofing-2409.08346"/></url>
<url><loc>https://scifaro.com/en/abs/openace-an-open-benchmark-for-evaluating-audio-coding-performance-2409.08374</loc><lastmod>2025-08-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/openace-an-open-benchmark-for-evaluating-audio-coding-performance-2409.08374"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/openace-an-open-benchmark-for-evaluating-audio-coding-performance-2409.08374"/></url>
<url><loc>https://scifaro.com/en/abs/soloaudio-target-sound-extraction-with-language-oriented-audio-diffusion-transformer-2409.08425</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soloaudio-target-sound-extraction-with-language-oriented-audio-diffusion-transformer-2409.08425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soloaudio-target-sound-extraction-with-language-oriented-audio-diffusion-transformer-2409.08425"/></url>
<url><loc>https://scifaro.com/en/abs/unified-audio-event-detection-2409.08552</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-audio-event-detection-2409.08552"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-audio-event-detection-2409.08552"/></url>
<url><loc>https://scifaro.com/en/abs/frequency-tracking-features-for-data-efficient-deep-siren-identification-2409.08587</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frequency-tracking-features-for-data-efficient-deep-siren-identification-2409.08587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frequency-tracking-features-for-data-efficient-deep-siren-identification-2409.08587"/></url>
<url><loc>https://scifaro.com/en/abs/effective-integration-of-kan-for-keyword-spotting-2409.08605</loc><lastmod>2025-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-integration-of-kan-for-keyword-spotting-2409.08605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-integration-of-kan-for-keyword-spotting-2409.08605"/></url>
<url><loc>https://scifaro.com/en/abs/dualsep-a-light-weight-dual-encoder-convolutional-recurrent-network-for-real-time-in-car-speech-separation-2409.08610</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dualsep-a-light-weight-dual-encoder-convolutional-recurrent-network-for-real-time-in-car-speech-separation-2409.08610"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dualsep-a-light-weight-dual-encoder-convolutional-recurrent-network-for-real-time-in-car-speech-separation-2409.08610"/></url>
<url><loc>https://scifaro.com/en/abs/nest-rq-next-token-prediction-for-speech-self-supervised-pre-training-2409.08680</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nest-rq-next-token-prediction-for-speech-self-supervised-pre-training-2409.08680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nest-rq-next-token-prediction-for-speech-self-supervised-pre-training-2409.08680"/></url>
<url><loc>https://scifaro.com/en/abs/a-dual-branch-parallel-network-for-speech-enhancement-and-restoration-2409.08702</loc><lastmod>2026-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-dual-branch-parallel-network-for-speech-enhancement-and-restoration-2409.08702"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-dual-branch-parallel-network-for-speech-enhancement-and-restoration-2409.08702"/></url>
<url><loc>https://scifaro.com/en/abs/text-to-speech-synthesis-in-the-wild-2409.08711</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-to-speech-synthesis-in-the-wild-2409.08711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-to-speech-synthesis-in-the-wild-2409.08711"/></url>
<url><loc>https://scifaro.com/en/abs/flamo-an-open-source-library-for-frequency-domain-differentiable-audio-processing-2409.08723</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flamo-an-open-source-library-for-frequency-domain-differentiable-audio-processing-2409.08723"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flamo-an-open-source-library-for-frequency-domain-differentiable-audio-processing-2409.08723"/></url>
<url><loc>https://scifaro.com/en/abs/llaqo-towards-a-query-based-coach-in-expressive-music-performance-assessment-2409.08795</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llaqo-towards-a-query-based-coach-in-expressive-music-performance-assessment-2409.08795"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llaqo-towards-a-query-based-coach-in-expressive-music-performance-assessment-2409.08795"/></url>
<url><loc>https://scifaro.com/en/abs/data-efficient-child-adult-speaker-diarization-with-simulated-conversations-2409.08881</loc><lastmod>2025-06-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-efficient-child-adult-speaker-diarization-with-simulated-conversations-2409.08881"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-efficient-child-adult-speaker-diarization-with-simulated-conversations-2409.08881"/></url>
<url><loc>https://scifaro.com/en/abs/hltcoe-jhu-submission-to-the-voice-privacy-challenge-2024-2409.08913</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hltcoe-jhu-submission-to-the-voice-privacy-challenge-2024-2409.08913"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hltcoe-jhu-submission-to-the-voice-privacy-challenge-2024-2409.08913"/></url>
<url><loc>https://scifaro.com/en/abs/why-some-audio-signal-short-time-fourier-transform-coefficients-have-nonuniform-phase-distributions-2409.08981</loc><lastmod>2024-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-some-audio-signal-short-time-fourier-transform-coefficients-have-nonuniform-phase-distributions-2409.08981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-some-audio-signal-short-time-fourier-transform-coefficients-have-nonuniform-phase-distributions-2409.08981"/></url>
<url><loc>https://scifaro.com/en/abs/slick-exploiting-subsequences-for-length-constrained-keyword-spotting-2409.09067</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slick-exploiting-subsequences-for-length-constrained-keyword-spotting-2409.09067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slick-exploiting-subsequences-for-length-constrained-keyword-spotting-2409.09067"/></url>
<url><loc>https://scifaro.com/en/abs/mambafoley-foley-sound-generation-using-selective-state-space-models-2409.09162</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mambafoley-foley-sound-generation-using-selective-state-space-models-2409.09162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mambafoley-foley-sound-generation-using-selective-state-space-models-2409.09162"/></url>
<url><loc>https://scifaro.com/en/abs/learnings-from-curating-a-trustworthy-well-annotated-and-useful-dataset-of-disordered-english-speech-2409.09190</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learnings-from-curating-a-trustworthy-well-annotated-and-useful-dataset-of-disordered-english-speech-2409.09190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learnings-from-curating-a-trustworthy-well-annotated-and-useful-dataset-of-disordered-english-speech-2409.09190"/></url>
<url><loc>https://scifaro.com/en/abs/reclap-improving-zero-shot-audio-classification-by-describing-sounds-2409.09213</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reclap-improving-zero-shot-audio-classification-by-describing-sounds-2409.09213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reclap-improving-zero-shot-audio-classification-by-describing-sounds-2409.09213"/></url>
<url><loc>https://scifaro.com/en/abs/improving-robustness-of-diffusion-based-zero-shot-speech-synthesis-via-stable-formant-generation-2409.09311</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-robustness-of-diffusion-based-zero-shot-speech-synthesis-via-stable-formant-generation-2409.09311"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-robustness-of-diffusion-based-zero-shot-speech-synthesis-via-stable-formant-generation-2409.09311"/></url>
<url><loc>https://scifaro.com/en/abs/improvements-of-discriminative-feature-space-training-for-anomalous-sound-detection-in-unlabeled-conditions-2409.09332</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improvements-of-discriminative-feature-space-training-for-anomalous-sound-detection-in-unlabeled-conditions-2409.09332"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improvements-of-discriminative-feature-space-training-for-anomalous-sound-detection-in-unlabeled-conditions-2409.09332"/></url>
<url><loc>https://scifaro.com/en/abs/wave-u-mamba-an-end-to-end-framework-for-high-quality-and-efficient-speech-super-resolution-2409.09337</loc><lastmod>2025-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wave-u-mamba-an-end-to-end-framework-for-high-quality-and-efficient-speech-super-resolution-2409.09337"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wave-u-mamba-an-end-to-end-framework-for-high-quality-and-efficient-speech-super-resolution-2409.09337"/></url>
<url><loc>https://scifaro.com/en/abs/e1-tts-simple-and-fast-non-autoregressive-tts-2409.09351</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/e1-tts-simple-and-fast-non-autoregressive-tts-2409.09351"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/e1-tts-simple-and-fast-non-autoregressive-tts-2409.09351"/></url>
<url><loc>https://scifaro.com/en/abs/text-prompt-is-not-enough-sound-event-enhanced-prompt-adapter-for-target-style-audio-generation-2409.09381</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-prompt-is-not-enough-sound-event-enhanced-prompt-adapter-for-target-style-audio-generation-2409.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-prompt-is-not-enough-sound-event-enhanced-prompt-adapter-for-target-style-audio-generation-2409.09381"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-multi-level-knowledge-distillation-for-enhanced-speaker-verification-2409.09389</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-multi-level-knowledge-distillation-for-enhanced-speaker-verification-2409.09389"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-multi-level-knowledge-distillation-for-enhanced-speaker-verification-2409.09389"/></url>
<url><loc>https://scifaro.com/en/abs/channel-adaptation-for-speaker-verification-using-optimal-transport-with-pseudo-label-2409.09396</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/channel-adaptation-for-speaker-verification-using-optimal-transport-with-pseudo-label-2409.09396"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/channel-adaptation-for-speaker-verification-using-optimal-transport-with-pseudo-label-2409.09396"/></url>
<url><loc>https://scifaro.com/en/abs/language-queried-target-sound-extraction-without-parallel-training-data-2409.09398</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-queried-target-sound-extraction-without-parallel-training-data-2409.09398"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-queried-target-sound-extraction-without-parallel-training-data-2409.09398"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-self-supervised-learning-for-speaker-diarization-2409.09408</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-self-supervised-learning-for-speaker-diarization-2409.09408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-self-supervised-learning-for-speaker-diarization-2409.09408"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-asr-with-whisper-2409.09543</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-asr-with-whisper-2409.09543"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-asr-with-whisper-2409.09543"/></url>
<url><loc>https://scifaro.com/en/abs/effective-pre-training-of-audio-transformers-for-sound-event-detection-2409.09546</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/effective-pre-training-of-audio-transformers-for-sound-event-detection-2409.09546"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/effective-pre-training-of-audio-transformers-for-sound-event-detection-2409.09546"/></url>
<url><loc>https://scifaro.com/en/abs/stutter-solver-end-to-end-multi-lingual-dysfluency-detection-2409.09621</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stutter-solver-end-to-end-multi-lingual-dysfluency-detection-2409.09621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stutter-solver-end-to-end-multi-lingual-dysfluency-detection-2409.09621"/></url>
<url><loc>https://scifaro.com/en/abs/extract-and-diffuse-latent-integration-for-improved-diffusion-based-speech-and-vocal-enhancement-2409.09642</loc><lastmod>2025-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/extract-and-diffuse-latent-integration-for-improved-diffusion-based-speech-and-vocal-enhancement-2409.09642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/extract-and-diffuse-latent-integration-for-improved-diffusion-based-speech-and-vocal-enhancement-2409.09642"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-multimodal-speech-representations-for-the-assessment-of-schizophrenia-symptoms-2409.09733</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-multimodal-speech-representations-for-the-assessment-of-schizophrenia-symptoms-2409.09733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-multimodal-speech-representations-for-the-assessment-of-schizophrenia-symptoms-2409.09733"/></url>
<url><loc>https://scifaro.com/en/abs/a-study-on-zero-shot-non-intrusive-speech-assessment-using-large-language-models-2409.09914</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-study-on-zero-shot-non-intrusive-speech-assessment-using-large-language-models-2409.09914"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-study-on-zero-shot-non-intrusive-speech-assessment-using-large-language-models-2409.09914"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-ensemble-singing-voice-synthesis-with-interactions-between-singers-2409.09988</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-ensemble-singing-voice-synthesis-with-interactions-between-singers-2409.09988"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-ensemble-singing-voice-synthesis-with-interactions-between-singers-2409.09988"/></url>
<url><loc>https://scifaro.com/en/abs/tbdm-net-bidirectional-dense-networks-with-gender-information-for-speech-emotion-recognition-2409.10056</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tbdm-net-bidirectional-dense-networks-with-gender-information-for-speech-emotion-recognition-2409.10056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tbdm-net-bidirectional-dense-networks-with-gender-information-for-speech-emotion-recognition-2409.10056"/></url>
<url><loc>https://scifaro.com/en/abs/styletts-zs-efficient-high-quality-zero-shot-text-to-speech-synthesis-with-distilled-time-varying-style-diffusion-2409.10058</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/styletts-zs-efficient-high-quality-zero-shot-text-to-speech-synthesis-with-distilled-time-varying-style-diffusion-2409.10058"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/styletts-zs-efficient-high-quality-zero-shot-text-to-speech-synthesis-with-distilled-time-varying-style-diffusion-2409.10058"/></url>
<url><loc>https://scifaro.com/en/abs/room-impulse-response-prototyping-using-receiver-distance-estimations-for-high-quality-room-equalisation-algorithms-2409.10131</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-impulse-response-prototyping-using-receiver-distance-estimations-for-high-quality-room-equalisation-algorithms-2409.10131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-impulse-response-prototyping-using-receiver-distance-estimations-for-high-quality-room-equalisation-algorithms-2409.10131"/></url>
<url><loc>https://scifaro.com/en/abs/emo-dpo-controllable-emotional-speech-synthesis-through-direct-preference-optimization-2409.10157</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emo-dpo-controllable-emotional-speech-synthesis-through-direct-preference-optimization-2409.10157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emo-dpo-controllable-emotional-speech-synthesis-through-direct-preference-optimization-2409.10157"/></url>
<url><loc>https://scifaro.com/en/abs/rf-gml-reference-free-generative-machine-listener-2409.10210</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rf-gml-reference-free-generative-machine-listener-2409.10210"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rf-gml-reference-free-generative-machine-listener-2409.10210"/></url>
<url><loc>https://scifaro.com/en/abs/speech-as-a-biomarker-for-disease-detection-2409.10230</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-as-a-biomarker-for-disease-detection-2409.10230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-as-a-biomarker-for-disease-detection-2409.10230"/></url>
<url><loc>https://scifaro.com/en/abs/obovox-far-field-speaker-recognition-a-novel-data-augmentation-approach-with-pretrained-models-2409.10240</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/obovox-far-field-speaker-recognition-a-novel-data-augmentation-approach-with-pretrained-models-2409.10240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/obovox-far-field-speaker-recognition-a-novel-data-augmentation-approach-with-pretrained-models-2409.10240"/></url>
<url><loc>https://scifaro.com/en/abs/ultra-low-latency-speech-enhancement-a-comprehensive-study-2409.10358</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ultra-low-latency-speech-enhancement-a-comprehensive-study-2409.10358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ultra-low-latency-speech-enhancement-a-comprehensive-study-2409.10358"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-joint-spectral-and-spatial-learning-with-mamba-for-multichannel-speech-enhancement-2409.10376</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-joint-spectral-and-spatial-learning-with-mamba-for-multichannel-speech-enhancement-2409.10376"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-joint-spectral-and-spatial-learning-with-mamba-for-multichannel-speech-enhancement-2409.10376"/></url>
<url><loc>https://scifaro.com/en/abs/smile-speech-meta-in-context-learning-for-low-resource-language-automatic-speech-recognition-2409.10429</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/smile-speech-meta-in-context-learning-for-low-resource-language-automatic-speech-recognition-2409.10429"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/smile-speech-meta-in-context-learning-for-low-resource-language-automatic-speech-recognition-2409.10429"/></url>
<url><loc>https://scifaro.com/en/abs/an-efficient-self-learning-framework-for-interactive-spoken-dialog-systems-2409.10515</loc><lastmod>2024-09-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-efficient-self-learning-framework-for-interactive-spoken-dialog-systems-2409.10515"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-efficient-self-learning-framework-for-interactive-spoken-dialog-systems-2409.10515"/></url>
<url><loc>https://scifaro.com/en/abs/a-real-time-platform-for-portable-and-scalable-active-noise-mitigation-for-construction-machinery-2409.10534</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-real-time-platform-for-portable-and-scalable-active-noise-mitigation-for-construction-machinery-2409.10534"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-real-time-platform-for-portable-and-scalable-active-noise-mitigation-for-construction-machinery-2409.10534"/></url>
<url><loc>https://scifaro.com/en/abs/fakemusiccaps-a-dataset-for-detection-and-attribution-of-synthetic-music-generated-via-text-to-music-models-2409.10684</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fakemusiccaps-a-dataset-for-detection-and-attribution-of-synthetic-music-generated-via-text-to-music-models-2409.10684"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fakemusiccaps-a-dataset-for-detection-and-attribution-of-synthetic-music-generated-via-text-to-music-models-2409.10684"/></url>
<url><loc>https://scifaro.com/en/abs/personalized-speech-emotion-recognition-in-human-robot-interaction-using-vision-transformers-2409.10687</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/personalized-speech-emotion-recognition-in-human-robot-interaction-using-vision-transformers-2409.10687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/personalized-speech-emotion-recognition-in-human-robot-interaction-using-vision-transformers-2409.10687"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-speech-models-for-word-level-stuttered-speech-detection-2409.10704</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-speech-models-for-word-level-stuttered-speech-detection-2409.10704"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-speech-models-for-word-level-stuttered-speech-detection-2409.10704"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-training-objectives-for-generative-speech-enhancement-2409.10753</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-training-objectives-for-generative-speech-enhancement-2409.10753"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-training-objectives-for-generative-speech-enhancement-2409.10753"/></url>
<url><loc>https://scifaro.com/en/abs/stimulus-modality-matters-impact-of-perceptual-evaluations-from-different-modalities-on-speech-emotion-recognition-system-performance-2409.10762</loc><lastmod>2025-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stimulus-modality-matters-impact-of-perceptual-evaluations-from-different-modalities-on-speech-emotion-recognition-system-performance-2409.10762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stimulus-modality-matters-impact-of-perceptual-evaluations-from-different-modalities-on-speech-emotion-recognition-system-performance-2409.10762"/></url>
<url><loc>https://scifaro.com/en/abs/towards-automatic-assessment-of-self-supervised-speech-models-using-rank-2409.10787</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-automatic-assessment-of-self-supervised-speech-models-using-rank-2409.10787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-automatic-assessment-of-self-supervised-speech-models-using-rank-2409.10787"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-prediction-targets-in-masked-pre-training-for-speech-foundation-models-2409.10788</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-prediction-targets-in-masked-pre-training-for-speech-foundation-models-2409.10788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-prediction-targets-in-masked-pre-training-for-speech-foundation-models-2409.10788"/></url>
<url><loc>https://scifaro.com/en/abs/speaker-ipl-unsupervised-learning-of-speaker-characteristics-with-i-vector-based-pseudo-labels-2409.10791</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speaker-ipl-unsupervised-learning-of-speaker-characteristics-with-i-vector-based-pseudo-labels-2409.10791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speaker-ipl-unsupervised-learning-of-speaker-characteristics-with-i-vector-based-pseudo-labels-2409.10791"/></url>
<url><loc>https://scifaro.com/en/abs/ezaudio-enhancing-text-to-audio-generation-with-efficient-diffusion-transformer-2409.10819</loc><lastmod>2025-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ezaudio-enhancing-text-to-audio-generation-with-efficient-diffusion-transformer-2409.10819"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ezaudio-enhancing-text-to-audio-generation-with-efficient-diffusion-transformer-2409.10819"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-code-switched-text-to-speech-synthesis-capability-in-large-language-models-with-only-monolingual-corpora-2409.10969</loc><lastmod>2025-08-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-code-switched-text-to-speech-synthesis-capability-in-large-language-models-with-only-monolingual-corpora-2409.10969"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-code-switched-text-to-speech-synthesis-capability-in-large-language-models-with-only-monolingual-corpora-2409.10969"/></url>
<url><loc>https://scifaro.com/en/abs/improving-speech-emotion-recognition-in-under-resourced-languages-via-speech-to-speech-translation-with-bootstrapping-data-selection-2409.10985</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-in-under-resourced-languages-via-speech-to-speech-translation-with-bootstrapping-data-selection-2409.10985"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-speech-emotion-recognition-in-under-resourced-languages-via-speech-to-speech-translation-with-bootstrapping-data-selection-2409.10985"/></url>
<url><loc>https://scifaro.com/en/abs/synthsod-developing-an-heterogeneous-dataset-for-orchestra-music-source-separation-2409.10995</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthsod-developing-an-heterogeneous-dataset-for-orchestra-music-source-separation-2409.10995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthsod-developing-an-heterogeneous-dataset-for-orchestra-music-source-separation-2409.10995"/></url>
<url><loc>https://scifaro.com/en/abs/an-explainable-probabilistic-attribute-embedding-approach-for-spoofed-speech-characterization-2409.11027</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-explainable-probabilistic-attribute-embedding-approach-for-spoofed-speech-characterization-2409.11027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-explainable-probabilistic-attribute-embedding-approach-for-spoofed-speech-characterization-2409.11027"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-text-to-speech-augmentation-for-automatic-speech-recognition-on-low-resource-accented-speech-corpora-2409.11107</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-augmentation-for-automatic-speech-recognition-on-low-resource-accented-speech-corpora-2409.11107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-text-to-speech-augmentation-for-automatic-speech-recognition-on-low-resource-accented-speech-corpora-2409.11107"/></url>
<url><loc>https://scifaro.com/en/abs/ideal-llm-integrating-dual-encoders-and-language-adapted-llm-for-multilingual-speech-to-text-2409.11214</loc><lastmod>2024-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ideal-llm-integrating-dual-encoders-and-language-adapted-llm-for-multilingual-speech-to-text-2409.11214"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ideal-llm-integrating-dual-encoders-and-language-adapted-llm-for-multilingual-speech-to-text-2409.11214"/></url>
<url><loc>https://scifaro.com/en/abs/m-best-rq-a-multi-channel-speech-foundation-model-for-smart-glasses-2409.11494</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m-best-rq-a-multi-channel-speech-foundation-model-for-smart-glasses-2409.11494"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m-best-rq-a-multi-channel-speech-foundation-model-for-smart-glasses-2409.11494"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-unit-based-masking-for-improving-disentanglement-in-voice-conversion-2409.11560</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-unit-based-masking-for-improving-disentanglement-in-voice-conversion-2409.11560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-unit-based-masking-for-improving-disentanglement-in-voice-conversion-2409.11560"/></url>
<url><loc>https://scifaro.com/en/abs/dense-tsnet-dense-connected-two-stage-structure-for-ultra-lightweight-speech-enhancement-2409.11725</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dense-tsnet-dense-connected-two-stage-structure-for-ultra-lightweight-speech-enhancement-2409.11725"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dense-tsnet-dense-connected-two-stage-structure-for-ultra-lightweight-speech-enhancement-2409.11725"/></url>
<url><loc>https://scifaro.com/en/abs/performance-and-robustness-of-signal-dependent-vs-signal-independent-binaural-signal-matching-with-wearable-microphone-arrays-2409.11731</loc><lastmod>2025-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/performance-and-robustness-of-signal-dependent-vs-signal-independent-binaural-signal-matching-with-wearable-microphone-arrays-2409.11731"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/performance-and-robustness-of-signal-dependent-vs-signal-independent-binaural-signal-matching-with-wearable-microphone-arrays-2409.11731"/></url>
<url><loc>https://scifaro.com/en/abs/conformal-prediction-for-manifold-based-source-localization-with-gaussian-processes-2409.11804</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/conformal-prediction-for-manifold-based-source-localization-with-gaussian-processes-2409.11804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/conformal-prediction-for-manifold-based-source-localization-with-gaussian-processes-2409.11804"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-an-inter-pausal-unit-ipu-based-approach-for-indic-end-to-end-tts-systems-2409.11915</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-an-inter-pausal-unit-ipu-based-approach-for-indic-end-to-end-tts-systems-2409.11915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-an-inter-pausal-unit-ipu-based-approach-for-indic-end-to-end-tts-systems-2409.11915"/></url>
<url><loc>https://scifaro.com/en/abs/low-frame-rate-speech-codec-a-codec-designed-for-fast-high-quality-speech-llm-training-and-inference-2409.12117</loc><lastmod>2024-09-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-frame-rate-speech-codec-a-codec-designed-for-fast-high-quality-speech-llm-training-and-inference-2409.12117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-frame-rate-speech-codec-a-codec-designed-for-fast-high-quality-speech-llm-training-and-inference-2409.12117"/></url>
<url><loc>https://scifaro.com/en/abs/meta-cat-speaker-informed-speech-embeddings-via-meta-information-concatenation-for-multi-talker-asr-2409.12352</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-cat-speaker-informed-speech-embeddings-via-meta-information-concatenation-for-multi-talker-asr-2409.12352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-cat-speaker-informed-speech-embeddings-via-meta-information-concatenation-for-multi-talker-asr-2409.12352"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audiovisual-speech-recognition-models-with-mixture-of-experts-2409.12370</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audiovisual-speech-recognition-models-with-mixture-of-experts-2409.12370"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audiovisual-speech-recognition-models-with-mixture-of-experts-2409.12370"/></url>
<url><loc>https://scifaro.com/en/abs/disentangling-speakers-in-multi-talker-speech-recognition-with-speaker-aware-ctc-2409.12388</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangling-speakers-in-multi-talker-speech-recognition-with-speaker-aware-ctc-2409.12388"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangling-speakers-in-multi-talker-speech-recognition-with-speaker-aware-ctc-2409.12388"/></url>
<url><loc>https://scifaro.com/en/abs/deft-mamba-universal-multichannel-sound-separation-and-polyphonic-audio-classification-2409.12413</loc><lastmod>2025-09-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deft-mamba-universal-multichannel-sound-separation-and-polyphonic-audio-classification-2409.12413"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deft-mamba-universal-multichannel-sound-separation-and-polyphonic-audio-classification-2409.12413"/></url>
<url><loc>https://scifaro.com/en/abs/multichannel-to-multichannel-target-sound-extraction-using-direction-and-timestamp-clues-2409.12415</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multichannel-to-multichannel-target-sound-extraction-using-direction-and-timestamp-clues-2409.12415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multichannel-to-multichannel-target-sound-extraction-using-direction-and-timestamp-clues-2409.12415"/></url>
<url><loc>https://scifaro.com/en/abs/speech-declipping-transformer-with-complex-spectrogram-and-learnerble-temporal-features-2409.12416</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-declipping-transformer-with-complex-spectrogram-and-learnerble-temporal-features-2409.12416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-declipping-transformer-with-complex-spectrogram-and-learnerble-temporal-features-2409.12416"/></url>
<url><loc>https://scifaro.com/en/abs/geometry-constrained-eeg-channel-selection-for-brain-assisted-speech-enhancement-2409.12520</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/geometry-constrained-eeg-channel-selection-for-brain-assisted-speech-enhancement-2409.12520"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/geometry-constrained-eeg-channel-selection-for-brain-assisted-speech-enhancement-2409.12520"/></url>
<url><loc>https://scifaro.com/en/abs/audiocomposer-towards-fine-grained-audio-generation-with-natural-language-descriptions-2409.12560</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiocomposer-towards-fine-grained-audio-generation-with-natural-language-descriptions-2409.12560"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiocomposer-towards-fine-grained-audio-generation-with-natural-language-descriptions-2409.12560"/></url>
<url><loc>https://scifaro.com/en/abs/ndvq-robust-neural-audio-codec-with-normal-distribution-based-vector-quantization-2409.12717</loc><lastmod>2024-09-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ndvq-robust-neural-audio-codec-with-normal-distribution-based-vector-quantization-2409.12717"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ndvq-robust-neural-audio-codec-with-normal-distribution-based-vector-quantization-2409.12717"/></url>
<url><loc>https://scifaro.com/en/abs/diffssd-a-diffusion-based-dataset-for-speech-forensics-2409.13049</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffssd-a-diffusion-based-dataset-for-speech-forensics-2409.13049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffssd-a-diffusion-based-dataset-for-speech-forensics-2409.13049"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-audio-only-data-for-text-queried-target-sound-extraction-2409.13152</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-audio-only-data-for-text-queried-target-sound-extraction-2409.13152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-audio-only-data-for-text-queried-target-sound-extraction-2409.13152"/></url>
<url><loc>https://scifaro.com/en/abs/lisennet-lightweight-sub-band-and-dual-path-modeling-for-real-time-speech-enhancement-2409.13285</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lisennet-lightweight-sub-band-and-dual-path-modeling-for-real-time-speech-enhancement-2409.13285"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lisennet-lightweight-sub-band-and-dual-path-modeling-for-real-time-speech-enhancement-2409.13285"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-text-queried-sound-event-detection-with-audio-source-separation-2409.13292</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-text-queried-sound-event-detection-with-audio-source-separation-2409.13292"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-text-queried-sound-event-detection-with-audio-source-separation-2409.13292"/></url>
<url><loc>https://scifaro.com/en/abs/neural-directional-filtering-far-field-directivity-control-with-a-small-microphone-array-2409.13502</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-directional-filtering-far-field-directivity-control-with-a-small-microphone-array-2409.13502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-directional-filtering-far-field-directivity-control-with-a-small-microphone-array-2409.13502"/></url>
<url><loc>https://scifaro.com/en/abs/time-and-tokens-benchmarking-end-to-end-speech-dysfluency-detection-2409.13582</loc><lastmod>2024-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-and-tokens-benchmarking-end-to-end-speech-dysfluency-detection-2409.13582"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-and-tokens-benchmarking-end-to-end-speech-dysfluency-detection-2409.13582"/></url>
<url><loc>https://scifaro.com/en/abs/gtsinger-a-global-multi-technique-singing-corpus-with-realistic-music-scores-for-all-singing-tasks-2409.13832</loc><lastmod>2026-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gtsinger-a-global-multi-technique-singing-corpus-with-realistic-music-scores-for-all-singing-tasks-2409.13832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gtsinger-a-global-multi-technique-singing-corpus-with-realistic-music-scores-for-all-singing-tasks-2409.13832"/></url>
<url><loc>https://scifaro.com/en/abs/zero-shot-cross-lingual-voice-transfer-for-tts-2409.13910</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zero-shot-cross-lingual-voice-transfer-for-tts-2409.13910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zero-shot-cross-lingual-voice-transfer-for-tts-2409.13910"/></url>
<url><loc>https://scifaro.com/en/abs/semi-intrusive-audio-evaluation-casting-non-intrusive-assessment-as-a-multi-modal-text-prediction-task-2409.14069</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-intrusive-audio-evaluation-casting-non-intrusive-assessment-as-a-multi-modal-text-prediction-task-2409.14069"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-intrusive-audio-evaluation-casting-non-intrusive-assessment-as-a-multi-modal-text-prediction-task-2409.14069"/></url>
<url><loc>https://scifaro.com/en/abs/codec-superb-slt-2024-a-lightweight-benchmark-for-neural-audio-codec-models-2409.14085</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/codec-superb-slt-2024-a-lightweight-benchmark-for-neural-audio-codec-models-2409.14085"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/codec-superb-slt-2024-a-lightweight-benchmark-for-neural-audio-codec-models-2409.14085"/></url>
<url><loc>https://scifaro.com/en/abs/are-music-foundation-models-better-at-singing-voice-deepfake-detection-far-better-fuse-them-with-speech-foundation-models-2409.14131</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-music-foundation-models-better-at-singing-voice-deepfake-detection-far-better-fuse-them-with-speech-foundation-models-2409.14131"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-music-foundation-models-better-at-singing-voice-deepfake-detection-far-better-fuse-them-with-speech-foundation-models-2409.14131"/></url>
<url><loc>https://scifaro.com/en/abs/strong-alone-stronger-together-synergizing-modality-binding-foundation-models-with-optimal-transport-for-non-verbal-emotion-recognition-2409.14221</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/strong-alone-stronger-together-synergizing-modality-binding-foundation-models-with-optimal-transport-for-non-verbal-emotion-recognition-2409.14221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/strong-alone-stronger-together-synergizing-modality-binding-foundation-models-with-optimal-transport-for-non-verbal-emotion-recognition-2409.14221"/></url>
<url><loc>https://scifaro.com/en/abs/avengers-assemble-amalgamation-of-non-semantic-features-for-depression-detection-2409.14312</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/avengers-assemble-amalgamation-of-non-semantic-features-for-depression-detection-2409.14312"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/avengers-assemble-amalgamation-of-non-semantic-features-for-depression-detection-2409.14312"/></url>
<url><loc>https://scifaro.com/en/abs/improved-direction-of-arrival-estimations-with-a-wearable-microphone-array-for-dynamic-environments-by-reliability-weighting-2409.14346</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improved-direction-of-arrival-estimations-with-a-wearable-microphone-array-for-dynamic-environments-by-reliability-weighting-2409.14346"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improved-direction-of-arrival-estimations-with-a-wearable-microphone-array-for-dynamic-environments-by-reliability-weighting-2409.14346"/></url>
<url><loc>https://scifaro.com/en/abs/a-feature-engineering-approach-for-literary-and-colloquial-tamil-speech-classification-using-1d-cnn-2409.14348</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-feature-engineering-approach-for-literary-and-colloquial-tamil-speech-classification-using-1d-cnn-2409.14348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-feature-engineering-approach-for-literary-and-colloquial-tamil-speech-classification-using-1d-cnn-2409.14348"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-word-discovery-boundary-detection-with-clustering-vs-dynamic-programming-2409.14486</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-word-discovery-boundary-detection-with-clustering-vs-dynamic-programming-2409.14486"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-word-discovery-boundary-detection-with-clustering-vs-dynamic-programming-2409.14486"/></url>
<url><loc>https://scifaro.com/en/abs/robust-audio-visual-speech-enhancement-correcting-misassignments-in-complex-environments-with-advanced-post-processing-2409.14554</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-audio-visual-speech-enhancement-correcting-misassignments-in-complex-environments-with-advanced-post-processing-2409.14554"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-audio-visual-speech-enhancement-correcting-misassignments-in-complex-environments-with-advanced-post-processing-2409.14554"/></url>
<url><loc>https://scifaro.com/en/abs/video-to-audio-generation-with-fine-grained-temporal-semantics-2409.14709</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/video-to-audio-generation-with-fine-grained-temporal-semantics-2409.14709"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/video-to-audio-generation-with-fine-grained-temporal-semantics-2409.14709"/></url>
<url><loc>https://scifaro.com/en/abs/room-impulse-responses-help-attackers-to-evade-deep-fake-detection-2409.14712</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-impulse-responses-help-attackers-to-evade-deep-fake-detection-2409.14712"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-impulse-responses-help-attackers-to-evade-deep-fake-detection-2409.14712"/></url>
<url><loc>https://scifaro.com/en/abs/llamapartialspoof-an-llm-driven-fake-speech-dataset-simulating-disinformation-generation-2409.14743</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llamapartialspoof-an-llm-driven-fake-speech-dataset-simulating-disinformation-generation-2409.14743"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llamapartialspoof-an-llm-driven-fake-speech-dataset-simulating-disinformation-generation-2409.14743"/></url>
<url><loc>https://scifaro.com/en/abs/ca-mhfa-a-context-aware-multi-head-factorized-attentive-pooling-for-ssl-based-speaker-verification-2409.15234</loc><lastmod>2024-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ca-mhfa-a-context-aware-multi-head-factorized-attentive-pooling-for-ssl-based-speaker-verification-2409.15234"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ca-mhfa-a-context-aware-multi-head-factorized-attentive-pooling-for-ssl-based-speaker-verification-2409.15234"/></url>
<url><loc>https://scifaro.com/en/abs/equivariance-based-self-supervised-learning-for-audio-signal-recovery-from-clipped-measurements-2409.15283</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/equivariance-based-self-supervised-learning-for-audio-signal-recovery-from-clipped-measurements-2409.15283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/equivariance-based-self-supervised-learning-for-audio-signal-recovery-from-clipped-measurements-2409.15283"/></url>
<url><loc>https://scifaro.com/en/abs/wavetransfer-a-flexible-end-to-end-multi-instrument-timbre-transfer-with-diffusion-2409.15321</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavetransfer-a-flexible-end-to-end-multi-instrument-timbre-transfer-with-diffusion-2409.15321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavetransfer-a-flexible-end-to-end-multi-instrument-timbre-transfer-with-diffusion-2409.15321"/></url>
<url><loc>https://scifaro.com/en/abs/a-large-dataset-of-spontaneous-speech-with-the-accent-spoken-in-s-ao-paulo-for-automatic-speech-recognition-evaluation-2409.15350</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-large-dataset-of-spontaneous-speech-with-the-accent-spoken-in-s-ao-paulo-for-automatic-speech-recognition-evaluation-2409.15350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-large-dataset-of-spontaneous-speech-with-the-accent-spoken-in-s-ao-paulo-for-automatic-speech-recognition-evaluation-2409.15350"/></url>
<url><loc>https://scifaro.com/en/abs/contextualization-of-asr-with-llm-using-phonetic-retrieval-based-augmentation-2409.15353</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/contextualization-of-asr-with-llm-using-phonetic-retrieval-based-augmentation-2409.15353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/contextualization-of-asr-with-llm-using-phonetic-retrieval-based-augmentation-2409.15353"/></url>
<url><loc>https://scifaro.com/en/abs/tcg-crest-system-description-for-the-second-displace-challenge-2409.15356</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tcg-crest-system-description-for-the-second-displace-challenge-2409.15356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tcg-crest-system-description-for-the-second-displace-challenge-2409.15356"/></url>
<url><loc>https://scifaro.com/en/abs/a-joint-spectro-temporal-relational-thinking-based-acoustic-modeling-framework-2409.15357</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-joint-spectro-temporal-relational-thinking-based-acoustic-modeling-framework-2409.15357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-joint-spectro-temporal-relational-thinking-based-acoustic-modeling-framework-2409.15357"/></url>
<url><loc>https://scifaro.com/en/abs/toward-automated-clinical-transcriptions-2409.15378</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-automated-clinical-transcriptions-2409.15378"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-automated-clinical-transcriptions-2409.15378"/></url>
<url><loc>https://scifaro.com/en/abs/the-parlaspeech-collection-of-automatically-generated-speech-and-text-datasets-from-parliamentary-proceedings-2409.15397</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-parlaspeech-collection-of-automatically-generated-speech-and-text-datasets-from-parliamentary-proceedings-2409.15397"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-parlaspeech-collection-of-automatically-generated-speech-and-text-datasets-from-parliamentary-proceedings-2409.15397"/></url>
<url><loc>https://scifaro.com/en/abs/blind-localization-of-early-room-reflections-with-arbitrary-microphone-array-2409.15484</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-localization-of-early-room-reflections-with-arbitrary-microphone-array-2409.15484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-localization-of-early-room-reflections-with-arbitrary-microphone-array-2409.15484"/></url>
<url><loc>https://scifaro.com/en/abs/addressing-emotion-bias-in-music-emotion-recognition-and-generation-with-frechet-audio-distance-2409.15545</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/addressing-emotion-bias-in-music-emotion-recognition-and-generation-with-frechet-audio-distance-2409.15545"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/addressing-emotion-bias-in-music-emotion-recognition-and-generation-with-frechet-audio-distance-2409.15545"/></url>
<url><loc>https://scifaro.com/en/abs/revise-reason-and-recognize-llm-based-emotion-recognition-via-emotion-specific-prompts-and-asr-error-correction-2409.15551</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revise-reason-and-recognize-llm-based-emotion-recognition-via-emotion-specific-prompts-and-asr-error-correction-2409.15551"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revise-reason-and-recognize-llm-based-emotion-recognition-via-emotion-specific-prompts-and-asr-error-correction-2409.15551"/></url>
<url><loc>https://scifaro.com/en/abs/safe-guard-an-llm-agent-for-real-time-voice-based-hate-speech-detection-in-social-virtual-reality-2409.15623</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/safe-guard-an-llm-agent-for-real-time-voice-based-hate-speech-detection-in-social-virtual-reality-2409.15623"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/safe-guard-an-llm-agent-for-real-time-voice-based-hate-speech-detection-in-social-virtual-reality-2409.15623"/></url>
<url><loc>https://scifaro.com/en/abs/language-based-audio-moment-retrieval-2409.15672</loc><lastmod>2025-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-based-audio-moment-retrieval-2409.15672"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-based-audio-moment-retrieval-2409.15672"/></url>
<url><loc>https://scifaro.com/en/abs/stylefusion-tts-multimodal-style-control-and-enhanced-feature-fusion-for-zero-shot-text-to-speech-synthesis-2409.15741</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stylefusion-tts-multimodal-style-control-and-enhanced-feature-fusion-for-zero-shot-text-to-speech-synthesis-2409.15741"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stylefusion-tts-multimodal-style-control-and-enhanced-feature-fusion-for-zero-shot-text-to-speech-synthesis-2409.15741"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-open-set-speaker-identification-through-rapid-tuning-with-speaker-reciprocal-points-and-negative-sample-2409.15742</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-open-set-speaker-identification-through-rapid-tuning-with-speaker-reciprocal-points-and-negative-sample-2409.15742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-open-set-speaker-identification-through-rapid-tuning-with-speaker-reciprocal-points-and-negative-sample-2409.15742"/></url>
<url><loc>https://scifaro.com/en/abs/representation-loss-minimization-with-randomized-selection-strategy-for-efficient-environmental-fake-audio-detection-2409.15767</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representation-loss-minimization-with-randomized-selection-strategy-for-efficient-environmental-fake-audio-detection-2409.15767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representation-loss-minimization-with-randomized-selection-strategy-for-efficient-environmental-fake-audio-detection-2409.15767"/></url>
<url><loc>https://scifaro.com/en/abs/m-vec-matryoshka-speaker-embeddings-with-flexible-dimensions-2409.15782</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m-vec-matryoshka-speaker-embeddings-with-flexible-dimensions-2409.15782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m-vec-matryoshka-speaker-embeddings-with-flexible-dimensions-2409.15782"/></url>
<url><loc>https://scifaro.com/en/abs/wesep-a-scalable-and-flexible-toolkit-towards-generalizable-target-speaker-extraction-2409.15799</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wesep-a-scalable-and-flexible-toolkit-towards-generalizable-target-speaker-extraction-2409.15799"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wesep-a-scalable-and-flexible-toolkit-towards-generalizable-target-speaker-extraction-2409.15799"/></url>
<url><loc>https://scifaro.com/en/abs/whisper-in-medusa-s-ear-multi-head-efficient-decoding-for-transformer-based-asr-2409.15869</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whisper-in-medusa-s-ear-multi-head-efficient-decoding-for-transformer-based-asr-2409.15869"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whisper-in-medusa-s-ear-multi-head-efficient-decoding-for-transformer-based-asr-2409.15869"/></url>
<url><loc>https://scifaro.com/en/abs/interpolation-filter-design-for-sample-rate-independent-audio-effect-rnns-2409.15884</loc><lastmod>2025-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interpolation-filter-design-for-sample-rate-independent-audio-effect-rnns-2409.15884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interpolation-filter-design-for-sample-rate-independent-audio-effect-rnns-2409.15884"/></url>
<url><loc>https://scifaro.com/en/abs/espnet-codec-comprehensive-training-and-evaluation-of-neural-codecs-for-audio-music-and-speech-2409.15897</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/espnet-codec-comprehensive-training-and-evaluation-of-neural-codecs-for-audio-music-and-speech-2409.15897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/espnet-codec-comprehensive-training-and-evaluation-of-neural-codecs-for-audio-music-and-speech-2409.15897"/></url>
<url><loc>https://scifaro.com/en/abs/tcsinger-zero-shot-singing-voice-synthesis-with-style-transfer-and-multi-level-style-control-2409.15977</loc><lastmod>2025-06-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tcsinger-zero-shot-singing-voice-synthesis-with-style-transfer-and-multi-level-style-control-2409.15977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tcsinger-zero-shot-singing-voice-synthesis-with-style-transfer-and-multi-level-style-control-2409.15977"/></url>
<url><loc>https://scifaro.com/en/abs/scenario-of-use-scheme-threat-model-specification-for-speaker-privacy-protection-in-the-medical-domain-2409.16106</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scenario-of-use-scheme-threat-model-specification-for-speaker-privacy-protection-in-the-medical-domain-2409.16106"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scenario-of-use-scheme-threat-model-specification-for-speaker-privacy-protection-in-the-medical-domain-2409.16106"/></url>
<url><loc>https://scifaro.com/en/abs/generative-speech-foundation-model-pretraining-for-high-quality-speech-extraction-and-restoration-2409.16117</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-speech-foundation-model-pretraining-for-high-quality-speech-extraction-and-restoration-2409.16117"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-speech-foundation-model-pretraining-for-high-quality-speech-extraction-and-restoration-2409.16117"/></url>
<url><loc>https://scifaro.com/en/abs/evaluation-of-speech-foundation-models-for-asr-on-child-adult-conversations-in-autism-diagnostic-sessions-2409.16135</loc><lastmod>2025-08-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluation-of-speech-foundation-models-for-asr-on-child-adult-conversations-in-autism-diagnostic-sessions-2409.16135"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluation-of-speech-foundation-models-for-asr-on-child-adult-conversations-in-autism-diagnostic-sessions-2409.16135"/></url>
<url><loc>https://scifaro.com/en/abs/an-explicit-consistency-preserving-loss-function-for-phase-reconstruction-and-speech-enhancement-2409.16282</loc><lastmod>2024-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-explicit-consistency-preserving-loss-function-for-phase-reconstruction-and-speech-enhancement-2409.16282"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-explicit-consistency-preserving-loss-function-for-phase-reconstruction-and-speech-enhancement-2409.16282"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-training-of-self-supervised-speech-foundation-models-on-a-compute-budget-2409.16295</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-training-of-self-supervised-speech-foundation-models-on-a-compute-budget-2409.16295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-training-of-self-supervised-speech-foundation-models-on-a-compute-budget-2409.16295"/></url>
<url><loc>https://scifaro.com/en/abs/how-redundant-is-the-transformer-stack-in-speech-representation-models-2409.16302</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-redundant-is-the-transformer-stack-in-speech-representation-models-2409.16302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-redundant-is-the-transformer-stack-in-speech-representation-models-2409.16302"/></url>
<url><loc>https://scifaro.com/en/abs/a-literature-review-of-keyword-spotting-technologies-for-urdu-2409.16317</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-literature-review-of-keyword-spotting-technologies-for-urdu-2409.16317"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-literature-review-of-keyword-spotting-technologies-for-urdu-2409.16317"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-within-class-variation-issue-in-alzheimer-s-disease-detection-2409.16322</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-within-class-variation-issue-in-alzheimer-s-disease-detection-2409.16322"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-within-class-variation-issue-in-alzheimer-s-disease-detection-2409.16322"/></url>
<url><loc>https://scifaro.com/en/abs/enabling-auditory-large-language-models-for-automatic-speech-quality-evaluation-2409.16644</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enabling-auditory-large-language-models-for-automatic-speech-quality-evaluation-2409.16644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enabling-auditory-large-language-models-for-automatic-speech-quality-evaluation-2409.16644"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-rescoring-with-large-speech-text-foundation-models-2409.16654</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-rescoring-with-large-speech-text-foundation-models-2409.16654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-rescoring-with-large-speech-text-foundation-models-2409.16654"/></url>
<url><loc>https://scifaro.com/en/abs/emotional-dimension-control-in-language-model-based-text-to-speech-spanning-a-broad-spectrum-of-human-emotions-2409.16681</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotional-dimension-control-in-language-model-based-text-to-speech-spanning-a-broad-spectrum-of-human-emotions-2409.16681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotional-dimension-control-in-language-model-based-text-to-speech-spanning-a-broad-spectrum-of-human-emotions-2409.16681"/></url>
<url><loc>https://scifaro.com/en/abs/incorporating-spatial-cues-in-modular-speaker-diarization-for-multi-channel-multi-party-meetings-2409.16803</loc><lastmod>2024-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incorporating-spatial-cues-in-modular-speaker-diarization-for-multi-channel-multi-party-meetings-2409.16803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incorporating-spatial-cues-in-modular-speaker-diarization-for-multi-channel-multi-party-meetings-2409.16803"/></url>
<url><loc>https://scifaro.com/en/abs/cross-lingual-speech-emotion-recognition-humans-vs-self-supervised-models-2409.16920</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cross-lingual-speech-emotion-recognition-humans-vs-self-supervised-models-2409.16920"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cross-lingual-speech-emotion-recognition-humans-vs-self-supervised-models-2409.16920"/></url>
<url><loc>https://scifaro.com/en/abs/semi-supervised-cognitive-state-classification-from-speech-with-multi-view-pseudo-labeling-2409.16937</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/semi-supervised-cognitive-state-classification-from-speech-with-multi-view-pseudo-labeling-2409.16937"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/semi-supervised-cognitive-state-classification-from-speech-with-multi-view-pseudo-labeling-2409.16937"/></url>
<url><loc>https://scifaro.com/en/abs/mt2kd-towards-a-general-purpose-encoder-for-speech-speaker-and-audio-events-2409.17010</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mt2kd-towards-a-general-purpose-encoder-for-speech-speaker-and-audio-events-2409.17010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mt2kd-towards-a-general-purpose-encoder-for-speech-speaker-and-audio-events-2409.17010"/></url>
<url><loc>https://scifaro.com/en/abs/multiview-canonical-correlation-analysis-for-automatic-pathological-speech-detection-2409.17276</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiview-canonical-correlation-analysis-for-automatic-pathological-speech-detection-2409.17276"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiview-canonical-correlation-analysis-for-automatic-pathological-speech-detection-2409.17276"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-synthetic-data-for-cross-speaker-style-transfer-in-style-representation-based-tts-2409.17364</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-synthetic-data-for-cross-speaker-style-transfer-in-style-representation-based-tts-2409.17364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-synthetic-data-for-cross-speaker-style-transfer-in-style-representation-based-tts-2409.17364"/></url>
<url><loc>https://scifaro.com/en/abs/description-based-controllable-text-to-speech-with-cross-lingual-voice-control-2409.17452</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/description-based-controllable-text-to-speech-with-cross-lingual-voice-control-2409.17452"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/description-based-controllable-text-to-speech-with-cross-lingual-voice-control-2409.17452"/></url>
<url><loc>https://scifaro.com/en/abs/flowmac-conditional-flow-matching-for-audio-coding-at-low-bit-rates-2409.17635</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowmac-conditional-flow-matching-for-audio-coding-at-low-bit-rates-2409.17635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowmac-conditional-flow-matching-for-audio-coding-at-low-bit-rates-2409.17635"/></url>
<url><loc>https://scifaro.com/en/abs/paraformer-v2-an-improved-non-autoregressive-transformer-for-noise-robust-speech-recognition-2409.17746</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/paraformer-v2-an-improved-non-autoregressive-transformer-for-noise-robust-speech-recognition-2409.17746"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/paraformer-v2-an-improved-non-autoregressive-transformer-for-noise-robust-speech-recognition-2409.17746"/></url>
<url><loc>https://scifaro.com/en/abs/are-transformers-in-pre-trained-lm-a-good-asr-encoder-an-empirical-study-2409.17750</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/are-transformers-in-pre-trained-lm-a-good-asr-encoder-an-empirical-study-2409.17750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/are-transformers-in-pre-trained-lm-a-good-asr-encoder-an-empirical-study-2409.17750"/></url>
<url><loc>https://scifaro.com/en/abs/mc-semamba-a-simple-multi-channel-extension-of-semamba-2409.17898</loc><lastmod>2024-09-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mc-semamba-a-simple-multi-channel-extension-of-semamba-2409.17898"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mc-semamba-a-simple-multi-channel-extension-of-semamba-2409.17898"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-acoustic-similarity-in-emotional-speech-and-music-via-self-supervised-representations-2409.17899</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-acoustic-similarity-in-emotional-speech-and-music-via-self-supervised-representations-2409.17899"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-acoustic-similarity-in-emotional-speech-and-music-via-self-supervised-representations-2409.17899"/></url>
<url><loc>https://scifaro.com/en/abs/mimii-gen-generative-modeling-approach-for-simulated-evaluation-of-anomalous-sound-detection-system-2409.18542</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mimii-gen-generative-modeling-approach-for-simulated-evaluation-of-anomalous-sound-detection-system-2409.18542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mimii-gen-generative-modeling-approach-for-simulated-evaluation-of-anomalous-sound-detection-system-2409.18542"/></url>
<url><loc>https://scifaro.com/en/abs/the-ieee-is2-2024-music-packet-loss-concealment-challenge-2409.18564</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ieee-is2-2024-music-packet-loss-concealment-challenge-2409.18564"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ieee-is2-2024-music-packet-loss-concealment-challenge-2409.18564"/></url>
<url><loc>https://scifaro.com/en/abs/speech-mamba-long-context-speech-recognition-with-selective-state-spaces-models-2409.18654</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-mamba-long-context-speech-recognition-with-selective-state-spaces-models-2409.18654"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-mamba-long-context-speech-recognition-with-selective-state-spaces-models-2409.18654"/></url>
<url><loc>https://scifaro.com/en/abs/speech-boosting-low-latency-live-speech-enhancement-for-tws-earbuds-2409.18705</loc><lastmod>2024-09-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-boosting-low-latency-live-speech-enhancement-for-tws-earbuds-2409.18705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-boosting-low-latency-live-speech-enhancement-for-tws-earbuds-2409.18705"/></url>
<url><loc>https://scifaro.com/en/abs/text2fx-harnessing-clap-embeddings-for-text-guided-audio-effects-2409.18847</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text2fx-harnessing-clap-embeddings-for-text-guided-audio-effects-2409.18847"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text2fx-harnessing-clap-embeddings-for-text-guided-audio-effects-2409.18847"/></url>
<url><loc>https://scifaro.com/en/abs/probing-mental-health-information-in-speech-foundation-models-2409.19042</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/probing-mental-health-information-in-speech-foundation-models-2409.19042"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/probing-mental-health-information-in-speech-foundation-models-2409.19042"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-and-mitigating-inconsistency-in-discrete-audio-tokens-for-neural-codec-language-models-2409.19283</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-and-mitigating-inconsistency-in-discrete-audio-tokens-for-neural-codec-language-models-2409.19283"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-and-mitigating-inconsistency-in-discrete-audio-tokens-for-neural-codec-language-models-2409.19283"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-long-form-speech-recognition-for-general-speech-in-context-learning-2409.19757</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-long-form-speech-recognition-for-general-speech-in-context-learning-2409.19757"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-long-form-speech-recognition-for-general-speech-in-context-learning-2409.19757"/></url>
<url><loc>https://scifaro.com/en/abs/guitar-pickups-i-analysis-of-the-effect-of-winding-and-wire-gauge-on-single-coil-electric-guitar-pickups-2409.19782</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guitar-pickups-i-analysis-of-the-effect-of-winding-and-wire-gauge-on-single-coil-electric-guitar-pickups-2409.19782"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guitar-pickups-i-analysis-of-the-effect-of-winding-and-wire-gauge-on-single-coil-electric-guitar-pickups-2409.19782"/></url>
<url><loc>https://scifaro.com/en/abs/fine-tuning-automatic-speech-recognition-for-people-with-parkinson-s-an-effective-strategy-for-enhancing-speech-technology-accessibility-2409.19818</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-tuning-automatic-speech-recognition-for-people-with-parkinson-s-an-effective-strategy-for-enhancing-speech-technology-accessibility-2409.19818"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-tuning-automatic-speech-recognition-for-people-with-parkinson-s-an-effective-strategy-for-enhancing-speech-technology-accessibility-2409.19818"/></url>
<url><loc>https://scifaro.com/en/abs/swim-short-window-cnn-integrated-with-mamba-for-eeg-based-auditory-spatial-attention-decoding-2409.19884</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/swim-short-window-cnn-integrated-with-mamba-for-eeg-based-auditory-spatial-attention-decoding-2409.19884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/swim-short-window-cnn-integrated-with-mamba-for-eeg-based-auditory-spatial-attention-decoding-2409.19884"/></url>
<url><loc>https://scifaro.com/en/abs/predictive-speech-recognition-and-end-of-utterance-detection-towards-spoken-dialog-systems-2409.19990</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predictive-speech-recognition-and-end-of-utterance-detection-towards-spoken-dialog-systems-2409.19990"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predictive-speech-recognition-and-end-of-utterance-detection-towards-spoken-dialog-systems-2409.19990"/></url>
<url><loc>https://scifaro.com/en/abs/desta2-developing-instruction-following-speech-language-model-without-speech-instruction-tuning-data-2409.20007</loc><lastmod>2025-07-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/desta2-developing-instruction-following-speech-language-model-without-speech-instruction-tuning-data-2409.20007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/desta2-developing-instruction-following-speech-language-model-without-speech-instruction-tuning-data-2409.20007"/></url>
<url><loc>https://scifaro.com/en/abs/alignment-free-training-for-transducer-based-multi-talker-asr-2409.20301</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignment-free-training-for-transducer-based-multi-talker-asr-2409.20301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignment-free-training-for-transducer-based-multi-talker-asr-2409.20301"/></url>
<url><loc>https://scifaro.com/en/abs/boosting-hybrid-autoregressive-transducer-based-asr-with-internal-acoustic-model-training-and-dual-blank-thresholding-2409.20313</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/boosting-hybrid-autoregressive-transducer-based-asr-with-internal-acoustic-model-training-and-dual-blank-thresholding-2409.20313"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/boosting-hybrid-autoregressive-transducer-based-asr-with-internal-acoustic-model-training-and-dual-blank-thresholding-2409.20313"/></url>
<url><loc>https://scifaro.com/en/abs/proposal-of-protocols-for-speech-materials-acquisition-and-presentation-assisted-by-tools-based-on-structured-test-signals-2409.20516</loc><lastmod>2024-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/proposal-of-protocols-for-speech-materials-acquisition-and-presentation-assisted-by-tools-based-on-structured-test-signals-2409.20516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/proposal-of-protocols-for-speech-materials-acquisition-and-presentation-assisted-by-tools-based-on-structured-test-signals-2409.20516"/></url>
<url><loc>https://scifaro.com/en/abs/feruzaspeech-a-60-hour-uzbek-read-speech-corpus-with-punctuation-casing-and-context-2410.00035</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feruzaspeech-a-60-hour-uzbek-read-speech-corpus-with-punctuation-casing-and-context-2410.00035"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feruzaspeech-a-60-hour-uzbek-read-speech-corpus-with-punctuation-casing-and-context-2410.00035"/></url>
<url><loc>https://scifaro.com/en/abs/moshi-a-speech-text-foundation-model-for-real-time-dialogue-2410.00037</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/moshi-a-speech-text-foundation-model-for-real-time-dialogue-2410.00037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/moshi-a-speech-text-foundation-model-for-real-time-dialogue-2410.00037"/></url>
<url><loc>https://scifaro.com/en/abs/mamba-for-streaming-asr-combined-with-unimodal-aggregation-2410.00070</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mamba-for-streaming-asr-combined-with-unimodal-aggregation-2410.00070"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mamba-for-streaming-asr-combined-with-unimodal-aggregation-2410.00070"/></url>
<url><loc>https://scifaro.com/en/abs/multi-scale-temporal-transformer-for-speech-emotion-recognition-2410.00390</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-scale-temporal-transformer-for-speech-emotion-recognition-2410.00390"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-scale-temporal-transformer-for-speech-emotion-recognition-2410.00390"/></url>
<url><loc>https://scifaro.com/en/abs/pre-training-with-synthetic-patterns-for-audio-2410.00511</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pre-training-with-synthetic-patterns-for-audio-2410.00511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pre-training-with-synthetic-patterns-for-audio-2410.00511"/></url>
<url><loc>https://scifaro.com/en/abs/wanna-hear-your-voice-a-sample-is-all-we-need-2410.00527</loc><lastmod>2025-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wanna-hear-your-voice-a-sample-is-all-we-need-2410.00527"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wanna-hear-your-voice-a-sample-is-all-we-need-2410.00527"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-speech-recognition-with-pre-trained-masked-language-model-2410.00528</loc><lastmod>2024-10-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-pre-trained-masked-language-model-2410.00528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-speech-recognition-with-pre-trained-masked-language-model-2410.00528"/></url>
<url><loc>https://scifaro.com/en/abs/the-conformer-encoder-may-reverse-the-time-dimension-2410.00680</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-conformer-encoder-may-reverse-the-time-dimension-2410.00680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-conformer-encoder-may-reverse-the-time-dimension-2410.00680"/></url>
<url><loc>https://scifaro.com/en/abs/augmentation-through-laundering-attacks-for-audio-spoof-detection-2410.01108</loc><lastmod>2025-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmentation-through-laundering-attacks-for-audio-spoof-detection-2410.01108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmentation-through-laundering-attacks-for-audio-spoof-detection-2410.01108"/></url>
<url><loc>https://scifaro.com/en/abs/restorative-speech-enhancement-a-progressive-approach-using-se-and-codec-modules-2410.01150</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/restorative-speech-enhancement-a-progressive-approach-using-se-and-codec-modules-2410.01150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/restorative-speech-enhancement-a-progressive-approach-using-se-and-codec-modules-2410.01150"/></url>
<url><loc>https://scifaro.com/en/abs/frozen-large-language-models-can-perceive-paralinguistic-aspects-of-speech-2410.01162</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/frozen-large-language-models-can-perceive-paralinguistic-aspects-of-speech-2410.01162"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/frozen-large-language-models-can-perceive-paralinguistic-aspects-of-speech-2410.01162"/></url>
<url><loc>https://scifaro.com/en/abs/hrtf-estimation-using-a-score-based-prior-2410.01562</loc><lastmod>2024-10-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hrtf-estimation-using-a-score-based-prior-2410.01562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hrtf-estimation-using-a-score-based-prior-2410.01562"/></url>
<url><loc>https://scifaro.com/en/abs/a-gen-ai-framework-for-medical-note-generation-2410.01841</loc><lastmod>2025-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-gen-ai-framework-for-medical-note-generation-2410.01841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-gen-ai-framework-for-medical-note-generation-2410.01841"/></url>
<url><loc>https://scifaro.com/en/abs/synthio-augmenting-small-scale-audio-classification-datasets-with-synthetic-data-2410.02056</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synthio-augmenting-small-scale-audio-classification-datasets-with-synthetic-data-2410.02056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synthio-augmenting-small-scale-audio-classification-datasets-with-synthetic-data-2410.02056"/></url>
<url><loc>https://scifaro.com/en/abs/state-of-the-art-embeddings-with-video-free-segmentation-of-the-source-voxceleb-data-2410.02364</loc><lastmod>2025-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/state-of-the-art-embeddings-with-video-free-segmentation-of-the-source-voxceleb-data-2410.02364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/state-of-the-art-embeddings-with-video-free-segmentation-of-the-source-voxceleb-data-2410.02364"/></url>
<url><loc>https://scifaro.com/en/abs/ntu-npu-system-for-voice-privacy-2024-challenge-2410.02371</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ntu-npu-system-for-voice-privacy-2024-challenge-2410.02371"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ntu-npu-system-for-voice-privacy-2024-challenge-2410.02371"/></url>
<url><loc>https://scifaro.com/en/abs/fastadasp-multitask-adapted-efficient-inference-for-large-speech-language-model-2410.03007</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fastadasp-multitask-adapted-efficient-inference-for-large-speech-language-model-2410.03007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fastadasp-multitask-adapted-efficient-inference-for-large-speech-language-model-2410.03007"/></url>
<url><loc>https://scifaro.com/en/abs/how-does-the-teacher-rate-observations-from-the-neuropiano-dataset-2410.03139</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-does-the-teacher-rate-observations-from-the-neuropiano-dataset-2410.03139"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-does-the-teacher-rate-observations-from-the-neuropiano-dataset-2410.03139"/></url>
<url><loc>https://scifaro.com/en/abs/multiverse-efficient-and-expressive-zero-shot-multi-task-text-to-speech-2410.03192</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiverse-efficient-and-expressive-zero-shot-multi-task-text-to-speech-2410.03192"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiverse-efficient-and-expressive-zero-shot-multi-task-text-to-speech-2410.03192"/></url>
<url><loc>https://scifaro.com/en/abs/manikin-recorded-cardiopulmonary-sounds-dataset-using-digital-stethoscope-2410.03280</loc><lastmod>2026-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/manikin-recorded-cardiopulmonary-sounds-dataset-using-digital-stethoscope-2410.03280"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/manikin-recorded-cardiopulmonary-sounds-dataset-using-digital-stethoscope-2410.03280"/></url>
<url><loc>https://scifaro.com/en/abs/textless-streaming-speech-to-speech-translation-using-semantic-speech-tokens-2410.03298</loc><lastmod>2024-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/textless-streaming-speech-to-speech-translation-using-semantic-speech-tokens-2410.03298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/textless-streaming-speech-to-speech-translation-using-semantic-speech-tokens-2410.03298"/></url>
<url><loc>https://scifaro.com/en/abs/adversarial-attacks-and-robust-defenses-in-speaker-embedding-based-zero-shot-text-to-speech-system-2410.04017</loc><lastmod>2025-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adversarial-attacks-and-robust-defenses-in-speaker-embedding-based-zero-shot-text-to-speech-system-2410.04017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adversarial-attacks-and-robust-defenses-in-speaker-embedding-based-zero-shot-text-to-speech-system-2410.04017"/></url>
<url><loc>https://scifaro.com/en/abs/enhancement-of-dysarthric-speech-reconstruction-by-contrastive-learning-2410.04092</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancement-of-dysarthric-speech-reconstruction-by-contrastive-learning-2410.04092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancement-of-dysarthric-speech-reconstruction-by-contrastive-learning-2410.04092"/></url>
<url><loc>https://scifaro.com/en/abs/dj-mix-transcription-with-multi-pass-non-negative-matrix-factorization-2410.04198</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dj-mix-transcription-with-multi-pass-non-negative-matrix-factorization-2410.04198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dj-mix-transcription-with-multi-pass-non-negative-matrix-factorization-2410.04198"/></url>
<url><loc>https://scifaro.com/en/abs/hall-e-hierarchical-neural-codec-language-model-for-minute-long-zero-shot-text-to-speech-synthesis-2410.04380</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/hall-e-hierarchical-neural-codec-language-model-for-minute-long-zero-shot-text-to-speech-synthesis-2410.04380"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/hall-e-hierarchical-neural-codec-language-model-for-minute-long-zero-shot-text-to-speech-synthesis-2410.04380"/></url>
<url><loc>https://scifaro.com/en/abs/seginr-segment-wise-implicit-neural-representation-for-sequence-alignment-in-neural-text-to-speech-2410.04690</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seginr-segment-wise-implicit-neural-representation-for-sequence-alignment-in-neural-text-to-speech-2410.04690"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seginr-segment-wise-implicit-neural-representation-for-sequence-alignment-in-neural-text-to-speech-2410.04690"/></url>
<url><loc>https://scifaro.com/en/abs/towards-ultra-low-power-neuromorphic-speech-enhancement-with-spiking-fullsubnet-2410.04785</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-ultra-low-power-neuromorphic-speech-enhancement-with-spiking-fullsubnet-2410.04785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-ultra-low-power-neuromorphic-speech-enhancement-with-spiking-fullsubnet-2410.04785"/></url>
<url><loc>https://scifaro.com/en/abs/a-decade-of-dcase-achievements-practices-evaluations-and-future-challenges-2410.04951</loc><lastmod>2024-10-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-decade-of-dcase-achievements-practices-evaluations-and-future-challenges-2410.04951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-decade-of-dcase-achievements-practices-evaluations-and-future-challenges-2410.04951"/></url>
<url><loc>https://scifaro.com/en/abs/cr-ctc-consistency-regularization-on-ctc-for-improved-speech-recognition-2410.05101</loc><lastmod>2025-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cr-ctc-consistency-regularization-on-ctc-for-improved-speech-recognition-2410.05101"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cr-ctc-consistency-regularization-on-ctc-for-improved-speech-recognition-2410.05101"/></url>
<url><loc>https://scifaro.com/en/abs/editing-music-with-melody-and-text-using-controlnet-for-diffusion-transformer-2410.05151</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/editing-music-with-melody-and-text-using-controlnet-for-diffusion-transformer-2410.05151"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/editing-music-with-melody-and-text-using-controlnet-for-diffusion-transformer-2410.05151"/></url>
<url><loc>https://scifaro.com/en/abs/episodic-fine-tuning-prototypical-networks-for-optimization-based-few-shot-learning-application-to-audio-classification-2410.05302</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/episodic-fine-tuning-prototypical-networks-for-optimization-based-few-shot-learning-application-to-audio-classification-2410.05302"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/episodic-fine-tuning-prototypical-networks-for-optimization-based-few-shot-learning-application-to-audio-classification-2410.05302"/></url>
<url><loc>https://scifaro.com/en/abs/the-ocon-model-an-old-but-gold-solution-for-distributable-supervised-classification-2410.05320</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ocon-model-an-old-but-gold-solution-for-distributable-supervised-classification-2410.05320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ocon-model-an-old-but-gold-solution-for-distributable-supervised-classification-2410.05320"/></url>
<url><loc>https://scifaro.com/en/abs/improving-data-augmentation-based-cross-speaker-style-transfer-for-tts-with-singing-voice-style-filtering-and-f0-matching-2410.05620</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-data-augmentation-based-cross-speaker-style-transfer-for-tts-with-singing-voice-style-filtering-and-f0-matching-2410.05620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-data-augmentation-based-cross-speaker-style-transfer-for-tts-with-singing-voice-style-filtering-and-f0-matching-2410.05620"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-rhythm-formant-analysis-for-indic-language-classification-2410.05724</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-rhythm-formant-analysis-for-indic-language-classification-2410.05724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-rhythm-formant-analysis-for-indic-language-classification-2410.05724"/></url>
<url><loc>https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-mmcsg-challenge-2410.05986</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-mmcsg-challenge-2410.05986"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-ustc-nercslip-systems-for-the-chime-8-mmcsg-challenge-2410.05986"/></url>
<url><loc>https://scifaro.com/en/abs/an-eye-for-an-ear-zero-shot-audio-description-leveraging-an-image-captioner-using-audiovisual-distribution-alignment-2410.05997</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-eye-for-an-ear-zero-shot-audio-description-leveraging-an-image-captioner-using-audiovisual-distribution-alignment-2410.05997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-eye-for-an-ear-zero-shot-audio-description-leveraging-an-image-captioner-using-audiovisual-distribution-alignment-2410.05997"/></url>
<url><loc>https://scifaro.com/en/abs/ls-eend-long-form-streaming-end-to-end-neural-diarization-with-online-attractor-extraction-2410.06670</loc><lastmod>2025-09-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ls-eend-long-form-streaming-end-to-end-neural-diarization-with-online-attractor-extraction-2410.06670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ls-eend-long-form-streaming-end-to-end-neural-diarization-with-online-attractor-extraction-2410.06670"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-training-strategies-for-natural-sounding-speech-synthesis-and-speaker-adaptation-based-on-fastpitch-2410.06787</loc><lastmod>2024-10-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-training-strategies-for-natural-sounding-speech-synthesis-and-speaker-adaptation-based-on-fastpitch-2410.06787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-training-strategies-for-natural-sounding-speech-synthesis-and-speaker-adaptation-based-on-fastpitch-2410.06787"/></url>
<url><loc>https://scifaro.com/en/abs/f5-tts-a-fairytaler-that-fakes-fluent-and-faithful-speech-with-flow-matching-2410.06885</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/f5-tts-a-fairytaler-that-fakes-fluent-and-faithful-speech-with-flow-matching-2410.06885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/f5-tts-a-fairytaler-that-fakes-fluent-and-faithful-speech-with-flow-matching-2410.06885"/></url>
<url><loc>https://scifaro.com/en/abs/swin-bert-a-feature-fusion-system-designed-for-speech-based-alzheimer-s-dementia-detection-2410.07277</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/swin-bert-a-feature-fusion-system-designed-for-speech-based-alzheimer-s-dementia-detection-2410.07277"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/swin-bert-a-feature-fusion-system-designed-for-speech-based-alzheimer-s-dementia-detection-2410.07277"/></url>
<url><loc>https://scifaro.com/en/abs/learn-from-real-reality-defender-s-submission-to-asvspoof5-challenge-2410.07379</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learn-from-real-reality-defender-s-submission-to-asvspoof5-challenge-2410.07379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learn-from-real-reality-defender-s-submission-to-asvspoof5-challenge-2410.07379"/></url>
<url><loc>https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-evaluation-plan-2410.07428</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-evaluation-plan-2410.07428"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-evaluation-plan-2410.07428"/></url>
<url><loc>https://scifaro.com/en/abs/robust-fixed-filter-sound-zone-control-with-audio-based-position-tracking-2410.07935</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-fixed-filter-sound-zone-control-with-audio-based-position-tracking-2410.07935"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-fixed-filter-sound-zone-control-with-audio-based-position-tracking-2410.07935"/></url>
<url><loc>https://scifaro.com/en/abs/sound-zone-control-robust-to-sound-speed-change-2410.07978</loc><lastmod>2024-10-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-zone-control-robust-to-sound-speed-change-2410.07978"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-zone-control-robust-to-sound-speed-change-2410.07978"/></url>
<url><loc>https://scifaro.com/en/abs/window-function-less-dft-with-reduced-noise-and-latency-for-real-time-music-analysis-2410.07982</loc><lastmod>2025-09-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/window-function-less-dft-with-reduced-noise-and-latency-for-real-time-music-analysis-2410.07982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/window-function-less-dft-with-reduced-noise-and-latency-for-real-time-music-analysis-2410.07982"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-asr-based-wav2vec2-for-automated-speech-disorder-assessment-insights-and-analysis-2410.08250</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-asr-based-wav2vec2-for-automated-speech-disorder-assessment-insights-and-analysis-2410.08250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-asr-based-wav2vec2-for-automated-speech-disorder-assessment-insights-and-analysis-2410.08250"/></url>
<url><loc>https://scifaro.com/en/abs/low-bitrate-high-quality-rvqgan-based-discrete-speech-tokenizer-2410.08325</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-bitrate-high-quality-rvqgan-based-discrete-speech-tokenizer-2410.08325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-bitrate-high-quality-rvqgan-based-discrete-speech-tokenizer-2410.08325"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-attention-based-unsupervised-anomalous-sound-detection-exploiting-separable-convolutions-and-angular-loss-2410.08919</loc><lastmod>2024-10-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-attention-based-unsupervised-anomalous-sound-detection-exploiting-separable-convolutions-and-angular-loss-2410.08919"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-attention-based-unsupervised-anomalous-sound-detection-exploiting-separable-convolutions-and-angular-loss-2410.08919"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-infant-crying-detection-with-gradient-boosting-for-improved-emotional-and-mental-health-diagnostics-2410.09236</loc><lastmod>2026-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-infant-crying-detection-with-gradient-boosting-for-improved-emotional-and-mental-health-diagnostics-2410.09236"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-infant-crying-detection-with-gradient-boosting-for-improved-emotional-and-mental-health-diagnostics-2410.09236"/></url>
<url><loc>https://scifaro.com/en/abs/slam-aac-enhancing-audio-captioning-with-paraphrasing-augmentation-and-clap-refine-through-llms-2410.09503</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slam-aac-enhancing-audio-captioning-with-paraphrasing-augmentation-and-clap-refine-through-llms-2410.09503"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slam-aac-enhancing-audio-captioning-with-paraphrasing-augmentation-and-clap-refine-through-llms-2410.09503"/></url>
<url><loc>https://scifaro.com/en/abs/can-we-estimate-purchase-intention-based-on-zero-shot-speech-emotion-recognition-2410.09636</loc><lastmod>2024-10-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-we-estimate-purchase-intention-based-on-zero-shot-speech-emotion-recognition-2410.09636"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-we-estimate-purchase-intention-based-on-zero-shot-speech-emotion-recognition-2410.09636"/></url>
<url><loc>https://scifaro.com/en/abs/in-materia-speech-recognition-2410.10434</loc><lastmod>2025-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/in-materia-speech-recognition-2410.10434"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/in-materia-speech-recognition-2410.10434"/></url>
<url><loc>https://scifaro.com/en/abs/code-drift-towards-idempotent-neural-audio-codecs-2410.11025</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/code-drift-towards-idempotent-neural-audio-codecs-2410.11025"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/code-drift-towards-idempotent-neural-audio-codecs-2410.11025"/></url>
<url><loc>https://scifaro.com/en/abs/dmospeech-direct-metric-optimization-via-distilled-diffusion-model-in-zero-shot-speech-synthesis-2410.11097</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dmospeech-direct-metric-optimization-via-distilled-diffusion-model-in-zero-shot-speech-synthesis-2410.11097"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dmospeech-direct-metric-optimization-via-distilled-diffusion-model-in-zero-shot-speech-synthesis-2410.11097"/></url>
<url><loc>https://scifaro.com/en/abs/darnet-dual-attention-refinement-network-with-spatiotemporal-construction-for-auditory-attention-detection-2410.11181</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/darnet-dual-attention-refinement-network-with-spatiotemporal-construction-for-auditory-attention-detection-2410.11181"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/darnet-dual-attention-refinement-network-with-spatiotemporal-construction-for-auditory-attention-detection-2410.11181"/></url>
<url><loc>https://scifaro.com/en/abs/mini-omni2-towards-open-source-gpt-4o-with-vision-speech-and-duplex-capabilities-2410.11190</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mini-omni2-towards-open-source-gpt-4o-with-vision-speech-and-duplex-capabilities-2410.11190"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mini-omni2-towards-open-source-gpt-4o-with-vision-speech-and-duplex-capabilities-2410.11190"/></url>
<url><loc>https://scifaro.com/en/abs/the-importance-of-spatial-and-spectral-information-in-multiple-speaker-tracking-2410.11453</loc><lastmod>2024-10-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-importance-of-spatial-and-spectral-information-in-multiple-speaker-tracking-2410.11453"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-importance-of-spatial-and-spectral-information-in-multiple-speaker-tracking-2410.11453"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-screening-for-children-with-speech-disorder-using-automatic-speech-recognition-opportunities-and-challenges-2410.11865</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-screening-for-children-with-speech-disorder-using-automatic-speech-recognition-opportunities-and-challenges-2410.11865"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-screening-for-children-with-speech-disorder-using-automatic-speech-recognition-opportunities-and-challenges-2410.11865"/></url>
<url><loc>https://scifaro.com/en/abs/guided-speaker-embedding-2410.12182</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/guided-speaker-embedding-2410.12182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/guided-speaker-embedding-2410.12182"/></url>
<url><loc>https://scifaro.com/en/abs/flashaudio-rectified-flows-for-fast-and-high-fidelity-text-to-audio-generation-2410.12266</loc><lastmod>2025-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flashaudio-rectified-flows-for-fast-and-high-fidelity-text-to-audio-generation-2410.12266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flashaudio-rectified-flows-for-fast-and-high-fidelity-text-to-audio-generation-2410.12266"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-oversmoothing-evaluating-ddpm-and-mse-for-scalable-speech-synthesis-in-asr-2410.12279</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-oversmoothing-evaluating-ddpm-and-mse-for-scalable-speech-synthesis-in-asr-2410.12279"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-oversmoothing-evaluating-ddpm-and-mse-for-scalable-speech-synthesis-in-asr-2410.12279"/></url>
<url><loc>https://scifaro.com/en/abs/ervq-enhanced-residual-vector-quantization-with-intra-and-inter-codebook-optimization-for-neural-audio-codecs-2410.12359</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ervq-enhanced-residual-vector-quantization-with-intra-and-inter-codebook-optimization-for-neural-audio-codecs-2410.12359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ervq-enhanced-residual-vector-quantization-with-intra-and-inter-codebook-optimization-for-neural-audio-codecs-2410.12359"/></url>
<url><loc>https://scifaro.com/en/abs/sifisinger-a-high-fidelity-end-to-end-singing-voice-synthesizer-based-on-source-filter-model-2410.12536</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sifisinger-a-high-fidelity-end-to-end-singing-voice-synthesizer-based-on-source-filter-model-2410.12536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sifisinger-a-high-fidelity-end-to-end-singing-voice-synthesizer-based-on-source-filter-model-2410.12536"/></url>
<url><loc>https://scifaro.com/en/abs/sequifi-mitigating-catastrophic-forgetting-in-speech-emotion-recognition-with-sequential-class-finetuning-2410.12567</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequifi-mitigating-catastrophic-forgetting-in-speech-emotion-recognition-with-sequential-class-finetuning-2410.12567"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequifi-mitigating-catastrophic-forgetting-in-speech-emotion-recognition-with-sequential-class-finetuning-2410.12567"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-speech-and-more-investigating-the-emergent-ability-of-speech-foundation-models-for-classifying-physiological-time-series-signals-2410.12645</loc><lastmod>2024-10-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-speech-and-more-investigating-the-emergent-ability-of-speech-foundation-models-for-classifying-physiological-time-series-signals-2410.12645"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-speech-and-more-investigating-the-emergent-ability-of-speech-foundation-models-for-classifying-physiological-time-series-signals-2410.12645"/></url>
<url><loc>https://scifaro.com/en/abs/attentivemos-a-lightweight-attention-only-model-for-speech-quality-prediction-2410.12675</loc><lastmod>2025-05-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attentivemos-a-lightweight-attention-only-model-for-speech-quality-prediction-2410.12675"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attentivemos-a-lightweight-attention-only-model-for-speech-quality-prediction-2410.12675"/></url>
<url><loc>https://scifaro.com/en/abs/exploiting-longitudinal-speech-sessions-via-voice-assistant-systems-for-early-detection-of-cognitive-decline-2410.12885</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploiting-longitudinal-speech-sessions-via-voice-assistant-systems-for-early-detection-of-cognitive-decline-2410.12885"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploiting-longitudinal-speech-sessions-via-voice-assistant-systems-for-early-detection-of-cognitive-decline-2410.12885"/></url>
<url><loc>https://scifaro.com/en/abs/ai-enhanced-acoustic-analysis-for-comprehensive-biodiversity-monitoring-and-assessment-2410.12897</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ai-enhanced-acoustic-analysis-for-comprehensive-biodiversity-monitoring-and-assessment-2410.12897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ai-enhanced-acoustic-analysis-for-comprehensive-biodiversity-monitoring-and-assessment-2410.12897"/></url>
<url><loc>https://scifaro.com/en/abs/multi-view-multi-task-modeling-with-speech-foundation-models-for-speech-forensic-tasks-2410.12947</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-view-multi-task-modeling-with-speech-foundation-models-for-speech-forensic-tasks-2410.12947"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-view-multi-task-modeling-with-speech-foundation-models-for-speech-forensic-tasks-2410.12947"/></url>
<url><loc>https://scifaro.com/en/abs/using-rlhf-to-align-speech-enhancement-approaches-to-mean-opinion-quality-scores-2410.13182</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/using-rlhf-to-align-speech-enhancement-approaches-to-mean-opinion-quality-scores-2410.13182"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/using-rlhf-to-align-speech-enhancement-approaches-to-mean-opinion-quality-scores-2410.13182"/></url>
<url><loc>https://scifaro.com/en/abs/failing-forward-improving-generative-error-correction-for-asr-with-synthetic-data-and-retrieval-augmentation-2410.13198</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/failing-forward-improving-generative-error-correction-for-asr-with-synthetic-data-and-retrieval-augmentation-2410.13198"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/failing-forward-improving-generative-error-correction-for-asr-with-synthetic-data-and-retrieval-augmentation-2410.13198"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-effective-speaker-property-privacy-protection-in-federated-learning-for-speech-emotion-recognition-2410.13221</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-effective-speaker-property-privacy-protection-in-federated-learning-for-speech-emotion-recognition-2410.13221"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-effective-speaker-property-privacy-protection-in-federated-learning-for-speech-emotion-recognition-2410.13221"/></url>
<url><loc>https://scifaro.com/en/abs/durian-e-2-duration-informed-attention-network-with-adaptive-variational-autoencoder-and-adversarial-learning-for-expressive-text-to-speech-synthesis-2410.13288</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/durian-e-2-duration-informed-attention-network-with-adaptive-variational-autoencoder-and-adversarial-learning-for-expressive-text-to-speech-synthesis-2410.13288"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/durian-e-2-duration-informed-attention-network-with-adaptive-variational-autoencoder-and-adversarial-learning-for-expressive-text-to-speech-synthesis-2410.13288"/></url>
<url><loc>https://scifaro.com/en/abs/dart-disentanglement-of-accent-and-speaker-representation-in-multispeaker-text-to-speech-2410.13342</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dart-disentanglement-of-accent-and-speaker-representation-in-multispeaker-text-to-speech-2410.13342"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dart-disentanglement-of-accent-and-speaker-representation-in-multispeaker-text-to-speech-2410.13342"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-crowdsourced-audio-for-text-to-speech-models-2410.13357</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-crowdsourced-audio-for-text-to-speech-models-2410.13357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-crowdsourced-audio-for-text-to-speech-models-2410.13357"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-audio-to-improve-dialogue-policies-2410.13385</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-audio-to-improve-dialogue-policies-2410.13385"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-audio-to-improve-dialogue-policies-2410.13385"/></url>
<url><loc>https://scifaro.com/en/abs/stcon-system-for-the-chime-8-challenge-2410.13411</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stcon-system-for-the-chime-8-challenge-2410.13411"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stcon-system-for-the-chime-8-challenge-2410.13411"/></url>
<url><loc>https://scifaro.com/en/abs/gan-based-speech-enhancement-for-low-snr-using-latent-feature-conditioning-2410.13599</loc><lastmod>2024-10-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gan-based-speech-enhancement-for-low-snr-using-latent-feature-conditioning-2410.13599"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gan-based-speech-enhancement-for-low-snr-using-latent-feature-conditioning-2410.13599"/></url>
<url><loc>https://scifaro.com/en/abs/align-ulcnet-towards-low-complexity-and-robust-acoustic-echo-and-noise-reduction-2410.13620</loc><lastmod>2025-08-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/align-ulcnet-towards-low-complexity-and-robust-acoustic-echo-and-noise-reduction-2410.13620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/align-ulcnet-towards-low-complexity-and-robust-acoustic-echo-and-noise-reduction-2410.13620"/></url>
<url><loc>https://scifaro.com/en/abs/a-unified-framework-for-collecting-text-to-speech-synthesis-datasets-for-22-indian-languages-2410.14197</loc><lastmod>2024-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-unified-framework-for-collecting-text-to-speech-synthesis-datasets-for-22-indian-languages-2410.14197"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-unified-framework-for-collecting-text-to-speech-synthesis-datasets-for-22-indian-languages-2410.14197"/></url>
<url><loc>https://scifaro.com/en/abs/ac-mix-self-supervised-adaptation-for-low-resource-automatic-speech-recognition-using-agnostic-contrastive-mixup-2410.14910</loc><lastmod>2025-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ac-mix-self-supervised-adaptation-for-low-resource-automatic-speech-recognition-using-agnostic-contrastive-mixup-2410.14910"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ac-mix-self-supervised-adaptation-for-low-resource-automatic-speech-recognition-using-agnostic-contrastive-mixup-2410.14910"/></url>
<url><loc>https://scifaro.com/en/abs/independent-feature-enhanced-crossmodal-fusion-for-match-mismatch-classification-of-speech-stimulus-and-eeg-response-2410.15078</loc><lastmod>2024-10-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/independent-feature-enhanced-crossmodal-fusion-for-match-mismatch-classification-of-speech-stimulus-and-eeg-response-2410.15078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/independent-feature-enhanced-crossmodal-fusion-for-match-mismatch-classification-of-speech-stimulus-and-eeg-response-2410.15078"/></url>
<url><loc>https://scifaro.com/en/abs/lscodec-low-bitrate-and-speaker-decoupled-discrete-speech-codec-2410.15764</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lscodec-low-bitrate-and-speaker-decoupled-discrete-speech-codec-2410.15764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lscodec-low-bitrate-and-speaker-decoupled-discrete-speech-codec-2410.15764"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-from-continuous-features-using-per-token-latent-diffusion-2410.16048</loc><lastmod>2025-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-from-continuous-features-using-per-token-latent-diffusion-2410.16048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-from-continuous-features-using-per-token-latent-diffusion-2410.16048"/></url>
<url><loc>https://scifaro.com/en/abs/multi-level-speaker-representation-for-target-speaker-extraction-2410.16059</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-level-speaker-representation-for-target-speaker-extraction-2410.16059"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-level-speaker-representation-for-target-speaker-extraction-2410.16059"/></url>
<url><loc>https://scifaro.com/en/abs/can-large-audio-language-models-truly-hear-tackling-hallucinations-with-multi-task-assessment-and-stepwise-audio-reasoning-2410.16130</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-large-audio-language-models-truly-hear-tackling-hallucinations-with-multi-task-assessment-and-stepwise-audio-reasoning-2410.16130"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-large-audio-language-models-truly-hear-tackling-hallucinations-with-multi-task-assessment-and-stepwise-audio-reasoning-2410.16130"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-transformer-based-automatic-speech-recognition-for-northern-kurdish-a-pioneering-approach-2410.16330</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-transformer-based-automatic-speech-recognition-for-northern-kurdish-a-pioneering-approach-2410.16330"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-transformer-based-automatic-speech-recognition-for-northern-kurdish-a-pioneering-approach-2410.16330"/></url>
<url><loc>https://scifaro.com/en/abs/ge2e-kws-generalized-end-to-end-training-and-evaluation-for-zero-shot-keyword-spotting-2410.16647</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ge2e-kws-generalized-end-to-end-training-and-evaluation-for-zero-shot-keyword-spotting-2410.16647"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ge2e-kws-generalized-end-to-end-training-and-evaluation-for-zero-shot-keyword-spotting-2410.16647"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-low-resource-asr-through-versatile-tts-bridging-the-data-gap-2410.16726</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-low-resource-asr-through-versatile-tts-bridging-the-data-gap-2410.16726"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-low-resource-asr-through-versatile-tts-bridging-the-data-gap-2410.16726"/></url>
<url><loc>https://scifaro.com/en/abs/can-a-machine-distinguish-high-and-low-amount-of-social-creak-in-speech-2410.17028</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/can-a-machine-distinguish-high-and-low-amount-of-social-creak-in-speech-2410.17028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/can-a-machine-distinguish-high-and-low-amount-of-social-creak-in-speech-2410.17028"/></url>
<url><loc>https://scifaro.com/en/abs/prototype-and-instance-contrastive-learning-for-unsupervised-domain-adaptation-in-speaker-verification-2410.17033</loc><lastmod>2024-10-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prototype-and-instance-contrastive-learning-for-unsupervised-domain-adaptation-in-speaker-verification-2410.17033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prototype-and-instance-contrastive-learning-for-unsupervised-domain-adaptation-in-speaker-verification-2410.17033"/></url>
<url><loc>https://scifaro.com/en/abs/improving-automatic-speech-recognition-with-decoder-centric-regularisation-in-encoder-decoder-models-2410.17437</loc><lastmod>2024-10-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-automatic-speech-recognition-with-decoder-centric-regularisation-in-encoder-decoder-models-2410.17437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-automatic-speech-recognition-with-decoder-centric-regularisation-in-encoder-decoder-models-2410.17437"/></url>
<url><loc>https://scifaro.com/en/abs/regularized-autoregressive-modeling-and-its-application-to-audio-signal-reconstruction-2410.17790</loc><lastmod>2026-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/regularized-autoregressive-modeling-and-its-application-to-audio-signal-reconstruction-2410.17790"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/regularized-autoregressive-modeling-and-its-application-to-audio-signal-reconstruction-2410.17790"/></url>
<url><loc>https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-with-diffusion-models-trained-on-clean-speech-2410.17834</loc><lastmod>2025-06-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-with-diffusion-models-trained-on-clean-speech-2410.17834"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-intrusive-speech-quality-assessment-with-diffusion-models-trained-on-clean-speech-2410.17834"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-on-speech-large-language-models-for-understanding-2410.18908</loc><lastmod>2025-12-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-on-speech-large-language-models-for-understanding-2410.18908"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-on-speech-large-language-models-for-understanding-2410.18908"/></url>
<url><loc>https://scifaro.com/en/abs/mmau-a-massive-multi-task-audio-understanding-and-reasoning-benchmark-2410.19168</loc><lastmod>2024-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mmau-a-massive-multi-task-audio-understanding-and-reasoning-benchmark-2410.19168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mmau-a-massive-multi-task-audio-understanding-and-reasoning-benchmark-2410.19168"/></url>
<url><loc>https://scifaro.com/en/abs/mask-weighted-spatial-likelihood-coding-for-speaker-independent-joint-localization-and-mask-estimation-2410.19595</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mask-weighted-spatial-likelihood-coding-for-speaker-independent-joint-localization-and-mask-estimation-2410.19595"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mask-weighted-spatial-likelihood-coding-for-speaker-independent-joint-localization-and-mask-estimation-2410.19595"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-long-term-rhythm-variations-in-mising-and-assamese-using-frequency-domain-correlates-2410.20095</loc><lastmod>2024-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-long-term-rhythm-variations-in-mising-and-assamese-using-frequency-domain-correlates-2410.20095"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-long-term-rhythm-variations-in-mising-and-assamese-using-frequency-domain-correlates-2410.20095"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-approaches-for-improving-detection-of-unseen-speech-deepfakes-2410.20578</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-approaches-for-improving-detection-of-unseen-speech-deepfakes-2410.20578"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-approaches-for-improving-detection-of-unseen-speech-deepfakes-2410.20578"/></url>
<url><loc>https://scifaro.com/en/abs/simultaneous-diarization-and-separation-of-meetings-through-the-integration-of-statistical-mixture-models-2410.21455</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/simultaneous-diarization-and-separation-of-meetings-through-the-integration-of-statistical-mixture-models-2410.21455"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/simultaneous-diarization-and-separation-of-meetings-through-the-integration-of-statistical-mixture-models-2410.21455"/></url>
<url><loc>https://scifaro.com/en/abs/a-tutorial-on-clinical-speech-ai-development-from-data-collection-to-model-validation-2410.21640</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-tutorial-on-clinical-speech-ai-development-from-data-collection-to-model-validation-2410.21640"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-tutorial-on-clinical-speech-ai-development-from-data-collection-to-model-validation-2410.21640"/></url>
<url><loc>https://scifaro.com/en/abs/representational-learning-for-an-anomalous-sound-detection-system-with-source-separation-model-2410.21797</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/representational-learning-for-an-anomalous-sound-detection-system-with-source-separation-model-2410.21797"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/representational-learning-for-an-anomalous-sound-detection-system-with-source-separation-model-2410.21797"/></url>
<url><loc>https://scifaro.com/en/abs/fast-and-high-quality-auto-regressive-speech-synthesis-via-speculative-decoding-2410.21951</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fast-and-high-quality-auto-regressive-speech-synthesis-via-speculative-decoding-2410.21951"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fast-and-high-quality-auto-regressive-speech-synthesis-via-speculative-decoding-2410.21951"/></url>
<url><loc>https://scifaro.com/en/abs/timbre-difference-capturing-in-anomalous-sound-detection-2410.22033</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/timbre-difference-capturing-in-anomalous-sound-detection-2410.22033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/timbre-difference-capturing-in-anomalous-sound-detection-2410.22033"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-approach-for-unsupervised-anomalous-sound-detection-and-captioning-without-model-training-2410.22056</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-approach-for-unsupervised-anomalous-sound-detection-and-captioning-without-model-training-2410.22056"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-approach-for-unsupervised-anomalous-sound-detection-and-captioning-without-model-training-2410.22056"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-reverberation-and-visual-depth-cues-for-sound-event-localization-and-detection-with-distance-estimation-2410.22271</loc><lastmod>2024-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-reverberation-and-visual-depth-cues-for-sound-event-localization-and-detection-with-distance-estimation-2410.22271"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-reverberation-and-visual-depth-cues-for-sound-event-localization-and-detection-with-distance-estimation-2410.22271"/></url>
<url><loc>https://scifaro.com/en/abs/a-closer-look-at-neural-codec-resynthesis-bridging-the-gap-between-codec-and-waveform-generation-2410.22448</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-closer-look-at-neural-codec-resynthesis-bridging-the-gap-between-codec-and-waveform-generation-2410.22448"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-closer-look-at-neural-codec-resynthesis-bridging-the-gap-between-codec-and-waveform-generation-2410.22448"/></url>
<url><loc>https://scifaro.com/en/abs/apcodec-a-spectrum-coding-based-high-fidelity-and-high-compression-rate-neural-audio-codec-with-staged-training-paradigm-2410.22807</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/apcodec-a-spectrum-coding-based-high-fidelity-and-high-compression-rate-neural-audio-codec-with-staged-training-paradigm-2410.22807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/apcodec-a-spectrum-coding-based-high-fidelity-and-high-compression-rate-neural-audio-codec-with-staged-training-paradigm-2410.22807"/></url>
<url><loc>https://scifaro.com/en/abs/augmenting-polish-automatic-speech-recognition-system-with-synthetic-data-2410.22903</loc><lastmod>2024-10-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/augmenting-polish-automatic-speech-recognition-system-with-synthetic-data-2410.22903"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/augmenting-polish-automatic-speech-recognition-system-with-synthetic-data-2410.22903"/></url>
<url><loc>https://scifaro.com/en/abs/audiovisual-angle-and-voice-incongruence-do-not-affect-audiovisual-verbal-short-term-memory-in-virtual-reality-2410.23015</loc><lastmod>2025-08-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiovisual-angle-and-voice-incongruence-do-not-affect-audiovisual-verbal-short-term-memory-in-virtual-reality-2410.23015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiovisual-angle-and-voice-incongruence-do-not-affect-audiovisual-verbal-short-term-memory-in-virtual-reality-2410.23015"/></url>
<url><loc>https://scifaro.com/en/abs/ddmd-ai-powered-digital-drug-music-detector-2410.23293</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ddmd-ai-powered-digital-drug-music-detector-2410.23293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ddmd-ai-powered-digital-drug-music-detector-2410.23293"/></url>
<url><loc>https://scifaro.com/en/abs/lina-speech-gated-linear-attention-and-initial-state-tuning-for-multi-sample-prompting-text-to-speech-synthesis-2410.23320</loc><lastmod>2025-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lina-speech-gated-linear-attention-and-initial-state-tuning-for-multi-sample-prompting-text-to-speech-synthesis-2410.23320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lina-speech-gated-linear-attention-and-initial-state-tuning-for-multi-sample-prompting-text-to-speech-synthesis-2410.23320"/></url>
<url><loc>https://scifaro.com/en/abs/phonology-guided-speech-to-speech-translation-for-african-languages-2410.23323</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phonology-guided-speech-to-speech-translation-for-african-languages-2410.23323"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phonology-guided-speech-to-speech-translation-for-african-languages-2410.23323"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-in-vocal-education-technical-evaluation-of-limited-samples-describing-mezzo-soprano-2410.23325</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-in-vocal-education-technical-evaluation-of-limited-samples-describing-mezzo-soprano-2410.23325"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-in-vocal-education-technical-evaluation-of-limited-samples-describing-mezzo-soprano-2410.23325"/></url>
<url><loc>https://scifaro.com/en/abs/scene-wide-acoustic-parameter-estimation-2410.23523</loc><lastmod>2025-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scene-wide-acoustic-parameter-estimation-2410.23523"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scene-wide-acoustic-parameter-estimation-2410.23523"/></url>
<url><loc>https://scifaro.com/en/abs/an-empirical-analysis-of-speech-self-supervised-learning-at-multiple-resolutions-2410.23955</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-empirical-analysis-of-speech-self-supervised-learning-at-multiple-resolutions-2410.23955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-empirical-analysis-of-speech-self-supervised-learning-at-multiple-resolutions-2410.23955"/></url>
<url><loc>https://scifaro.com/en/abs/task-aware-unified-source-separation-2410.23987</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/task-aware-unified-source-separation-2410.23987"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/task-aware-unified-source-separation-2410.23987"/></url>
<url><loc>https://scifaro.com/en/abs/cough-e-a-multimodal-privacy-preserving-cough-detection-algorithm-for-the-edge-2410.24066</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cough-e-a-multimodal-privacy-preserving-cough-detection-algorithm-for-the-edge-2410.24066"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cough-e-a-multimodal-privacy-preserving-cough-detection-algorithm-for-the-edge-2410.24066"/></url>
<url><loc>https://scifaro.com/en/abs/dc-spin-a-speaker-invariant-speech-tokenizer-for-spoken-language-models-2410.24177</loc><lastmod>2024-11-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dc-spin-a-speaker-invariant-speech-tokenizer-for-spoken-language-models-2410.24177"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dc-spin-a-speaker-invariant-speech-tokenizer-for-spoken-language-models-2410.24177"/></url>
<url><loc>https://scifaro.com/en/abs/device-directed-speech-detection-for-follow-up-conversations-using-large-language-models-2411.00023</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/device-directed-speech-detection-for-follow-up-conversations-using-large-language-models-2411.00023"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/device-directed-speech-detection-for-follow-up-conversations-using-large-language-models-2411.00023"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-contextual-speech-recognition-using-vector-quantization-for-efficient-retrieval-2411.00664</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-contextual-speech-recognition-using-vector-quantization-for-efficient-retrieval-2411.00664"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-contextual-speech-recognition-using-vector-quantization-for-efficient-retrieval-2411.00664"/></url>
<url><loc>https://scifaro.com/en/abs/sann-psz-spatially-adaptive-neural-network-for-head-tracked-personal-sound-zones-2411.00772</loc><lastmod>2024-11-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sann-psz-spatially-adaptive-neural-network-for-head-tracked-personal-sound-zones-2411.00772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sann-psz-spatially-adaptive-neural-network-for-head-tracked-personal-sound-zones-2411.00772"/></url>
<url><loc>https://scifaro.com/en/abs/an-incremental-algorithm-based-on-multichannel-non-negative-matrix-partial-co-factorization-for-ambient-denoising-in-auscultation-2411.01018</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-incremental-algorithm-based-on-multichannel-non-negative-matrix-partial-co-factorization-for-ambient-denoising-in-auscultation-2411.01018"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-incremental-algorithm-based-on-multichannel-non-negative-matrix-partial-co-factorization-for-ambient-denoising-in-auscultation-2411.01018"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-llm-and-text-queried-separation-for-noise-robust-sound-event-detection-2411.01174</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-llm-and-text-queried-separation-for-noise-robust-sound-event-detection-2411.01174"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-llm-and-text-queried-separation-for-noise-robust-sound-event-detection-2411.01174"/></url>
<url><loc>https://scifaro.com/en/abs/modulating-state-space-model-with-slowfast-framework-for-compute-efficient-ultra-low-latency-speech-enhancement-2411.02019</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modulating-state-space-model-with-slowfast-framework-for-compute-efficient-ultra-low-latency-speech-enhancement-2411.02019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modulating-state-space-model-with-slowfast-framework-for-compute-efficient-ultra-low-latency-speech-enhancement-2411.02019"/></url>
<url><loc>https://scifaro.com/en/abs/complete-reconstruction-of-the-tongue-contour-through-acoustic-to-articulatory-inversion-using-real-time-mri-data-2411.02037</loc><lastmod>2026-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complete-reconstruction-of-the-tongue-contour-through-acoustic-to-articulatory-inversion-using-real-time-mri-data-2411.02037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complete-reconstruction-of-the-tongue-contour-through-acoustic-to-articulatory-inversion-using-real-time-mri-data-2411.02037"/></url>
<url><loc>https://scifaro.com/en/abs/joint-training-of-speaker-embedding-extractor-speech-and-overlap-detection-for-diarization-2411.02165</loc><lastmod>2024-11-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-training-of-speaker-embedding-extractor-speech-and-overlap-detection-for-diarization-2411.02165"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-training-of-speaker-embedding-extractor-speech-and-overlap-detection-for-diarization-2411.02165"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-detection-and-classification-of-heartbeats-using-the-dissimilarity-matrix-in-pcg-signals-2411.03061</loc><lastmod>2024-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-detection-and-classification-of-heartbeats-using-the-dissimilarity-matrix-in-pcg-signals-2411.03061"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-detection-and-classification-of-heartbeats-using-the-dissimilarity-matrix-in-pcg-signals-2411.03061"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-hearing-aid-voice-control-2411.03150</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-hearing-aid-voice-control-2411.03150"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-hearing-aid-voice-control-2411.03150"/></url>
<url><loc>https://scifaro.com/en/abs/reference-microphone-selection-for-the-weighted-prediction-error-algorithm-using-the-normalized-l-p-norm-2411.03168</loc><lastmod>2026-02-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reference-microphone-selection-for-the-weighted-prediction-error-algorithm-using-the-normalized-l-p-norm-2411.03168"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reference-microphone-selection-for-the-weighted-prediction-error-algorithm-using-the-normalized-l-p-norm-2411.03168"/></url>
<url><loc>https://scifaro.com/en/abs/blind-estimation-of-sub-band-acoustic-parameters-from-ambisonics-recordings-using-spectro-spatial-covariance-features-2411.03172</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/blind-estimation-of-sub-band-acoustic-parameters-from-ambisonics-recordings-using-spectro-spatial-covariance-features-2411.03172"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/blind-estimation-of-sub-band-acoustic-parameters-from-ambisonics-recordings-using-spectro-spatial-covariance-features-2411.03172"/></url>
<url><loc>https://scifaro.com/en/abs/unified-pathological-speech-analysis-with-prompt-tuning-2411.04142</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unified-pathological-speech-analysis-with-prompt-tuning-2411.04142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unified-pathological-speech-analysis-with-prompt-tuning-2411.04142"/></url>
<url><loc>https://scifaro.com/en/abs/a-contrastive-self-supervised-learning-scheme-for-beat-tracking-amenable-to-few-shot-learning-2411.04152</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-contrastive-self-supervised-learning-scheme-for-beat-tracking-amenable-to-few-shot-learning-2411.04152"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-contrastive-self-supervised-learning-scheme-for-beat-tracking-amenable-to-few-shot-learning-2411.04152"/></url>
<url><loc>https://scifaro.com/en/abs/analyzing-multimodal-features-of-spontaneous-voice-assistant-commands-for-mild-cognitive-impairment-detection-2411.04158</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analyzing-multimodal-features-of-spontaneous-voice-assistant-commands-for-mild-cognitive-impairment-detection-2411.04158"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analyzing-multimodal-features-of-spontaneous-voice-assistant-commands-for-mild-cognitive-impairment-detection-2411.04158"/></url>
<url><loc>https://scifaro.com/en/abs/a-pre-training-framework-that-encodes-noise-information-for-speech-quality-assessment-2411.04379</loc><lastmod>2024-11-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-pre-training-framework-that-encodes-noise-information-for-speech-quality-assessment-2411.04379"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-pre-training-framework-that-encodes-noise-information-for-speech-quality-assessment-2411.04379"/></url>
<url><loc>https://scifaro.com/en/abs/audiobox-tta-rag-improving-zero-shot-and-few-shot-text-to-audio-with-retrieval-augmented-generation-2411.05141</loc><lastmod>2025-06-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiobox-tta-rag-improving-zero-shot-and-few-shot-text-to-audio-with-retrieval-augmented-generation-2411.05141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiobox-tta-rag-improving-zero-shot-and-few-shot-text-to-audio-with-retrieval-augmented-generation-2411.05141"/></url>
<url><loc>https://scifaro.com/en/abs/an-ambient-denoising-method-based-on-multi-channel-non-negative-matrix-factorization-for-wheezing-detection-2411.05774</loc><lastmod>2024-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-ambient-denoising-method-based-on-multi-channel-non-negative-matrix-factorization-for-wheezing-detection-2411.05774"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-ambient-denoising-method-based-on-multi-channel-non-negative-matrix-factorization-for-wheezing-detection-2411.05774"/></url>
<url><loc>https://scifaro.com/en/abs/classification-of-adventitious-sounds-combining-cochleogram-and-vision-transformers-2411.05955</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/classification-of-adventitious-sounds-combining-cochleogram-and-vision-transformers-2411.05955"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/classification-of-adventitious-sounds-combining-cochleogram-and-vision-transformers-2411.05955"/></url>
<url><loc>https://scifaro.com/en/abs/a-kalman-filter-model-for-synchronization-in-musical-ensembles-2411.05971</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-kalman-filter-model-for-synchronization-in-musical-ensembles-2411.05971"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-kalman-filter-model-for-synchronization-in-musical-ensembles-2411.05971"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-estimation-of-schizophrenia-severity-using-feature-fusion-2411.06033</loc><lastmod>2024-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-estimation-of-schizophrenia-severity-using-feature-fusion-2411.06033"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-estimation-of-schizophrenia-severity-using-feature-fusion-2411.06033"/></url>
<url><loc>https://scifaro.com/en/abs/selective-state-space-model-for-monaural-speech-enhancement-2411.06217</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/selective-state-space-model-for-monaural-speech-enhancement-2411.06217"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/selective-state-space-model-for-monaural-speech-enhancement-2411.06217"/></url>
<url><loc>https://scifaro.com/en/abs/pseldnets-pre-trained-neural-networks-on-a-large-scale-synthetic-dataset-for-sound-event-localization-and-detection-2411.06399</loc><lastmod>2025-07-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseldnets-pre-trained-neural-networks-on-a-large-scale-synthetic-dataset-for-sound-event-localization-and-detection-2411.06399"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseldnets-pre-trained-neural-networks-on-a-large-scale-synthetic-dataset-for-sound-event-localization-and-detection-2411.06399"/></url>
<url><loc>https://scifaro.com/en/abs/ctc-assisted-llm-based-contextual-asr-2411.06437</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ctc-assisted-llm-based-contextual-asr-2411.06437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ctc-assisted-llm-based-contextual-asr-2411.06437"/></url>
<url><loc>https://scifaro.com/en/abs/debatts-zero-shot-debating-text-to-speech-synthesis-2411.06540</loc><lastmod>2024-12-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/debatts-zero-shot-debating-text-to-speech-synthesis-2411.06540"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/debatts-zero-shot-debating-text-to-speech-synthesis-2411.06540"/></url>
<url><loc>https://scifaro.com/en/abs/diff-mstc-a-mixing-style-transfer-prototype-for-cubase-2411.06576</loc><lastmod>2024-11-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diff-mstc-a-mixing-style-transfer-prototype-for-cubase-2411.06576"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diff-mstc-a-mixing-style-transfer-prototype-for-cubase-2411.06576"/></url>
<url><loc>https://scifaro.com/en/abs/dcf-ds-deep-cascade-fusion-of-diarization-and-separation-for-speech-recognition-under-realistic-single-channel-conditions-2411.06667</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dcf-ds-deep-cascade-fusion-of-diarization-and-separation-for-speech-recognition-under-realistic-single-channel-conditions-2411.06667"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dcf-ds-deep-cascade-fusion-of-diarization-and-separation-for-speech-recognition-under-realistic-single-channel-conditions-2411.06667"/></url>
<url><loc>https://scifaro.com/en/abs/aeromamba-an-efficient-architecture-for-audio-super-resolution-using-generative-adversarial-networks-and-state-space-models-2411.07364</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aeromamba-an-efficient-architecture-for-audio-super-resolution-using-generative-adversarial-networks-and-state-space-models-2411.07364"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aeromamba-an-efficient-architecture-for-audio-super-resolution-using-generative-adversarial-networks-and-state-space-models-2411.07364"/></url>
<url><loc>https://scifaro.com/en/abs/cjst-ctc-compressor-based-joint-speech-and-text-training-for-decoder-only-asr-2411.07607</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cjst-ctc-compressor-based-joint-speech-and-text-training-for-decoder-only-asr-2411.07607"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cjst-ctc-compressor-based-joint-speech-and-text-training-for-decoder-only-asr-2411.07607"/></url>
<url><loc>https://scifaro.com/en/abs/study-on-inter-and-intra-speaker-variability-in-speaker-recognition-2411.07754</loc><lastmod>2024-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/study-on-inter-and-intra-speaker-variability-in-speaker-recognition-2411.07754"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/study-on-inter-and-intra-speaker-variability-in-speaker-recognition-2411.07754"/></url>
<url><loc>https://scifaro.com/en/abs/state-space-estimation-of-spatially-dynamic-room-impulse-responses-using-a-room-acoustic-model-based-prior-2411.08477</loc><lastmod>2024-11-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/state-space-estimation-of-spatially-dynamic-room-impulse-responses-using-a-room-acoustic-model-based-prior-2411.08477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/state-space-estimation-of-spatially-dynamic-room-impulse-responses-using-a-room-acoustic-model-based-prior-2411.08477"/></url>
<url><loc>https://scifaro.com/en/abs/transferable-adversarial-attacks-against-asr-2411.09220</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transferable-adversarial-attacks-against-asr-2411.09220"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transferable-adversarial-attacks-against-asr-2411.09220"/></url>
<url><loc>https://scifaro.com/en/abs/an-end-to-end-stuttering-detection-method-based-on-conformer-and-bilstm-2411.09479</loc><lastmod>2024-11-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-end-to-end-stuttering-detection-method-based-on-conformer-and-bilstm-2411.09479"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-end-to-end-stuttering-detection-method-based-on-conformer-and-bilstm-2411.09479"/></url>
<url><loc>https://scifaro.com/en/abs/xlsr-mamba-a-dual-column-bidirectional-state-space-model-for-spoofing-attack-detection-2411.10027</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/xlsr-mamba-a-dual-column-bidirectional-state-space-model-for-spoofing-attack-detection-2411.10027"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/xlsr-mamba-a-dual-column-bidirectional-state-space-model-for-spoofing-attack-detection-2411.10027"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-implications-of-simplifying-geometrical-acoustics-models-for-ambisonics-based-binaural-reverberation-2411.10375</loc><lastmod>2024-11-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-implications-of-simplifying-geometrical-acoustics-models-for-ambisonics-based-binaural-reverberation-2411.10375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-implications-of-simplifying-geometrical-acoustics-models-for-ambisonics-based-binaural-reverberation-2411.10375"/></url>
<url><loc>https://scifaro.com/en/abs/memory-efficient-training-for-text-dependent-sv-with-independent-pre-trained-models-2411.10828</loc><lastmod>2026-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-efficient-training-for-text-dependent-sv-with-independent-pre-trained-models-2411.10828"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-efficient-training-for-text-dependent-sv-with-independent-pre-trained-models-2411.10828"/></url>
<url><loc>https://scifaro.com/en/abs/explainable-dnn-based-beamformer-with-postfilter-2411.10854</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/explainable-dnn-based-beamformer-with-postfilter-2411.10854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/explainable-dnn-based-beamformer-with-postfilter-2411.10854"/></url>
<url><loc>https://scifaro.com/en/abs/uncovering-the-role-of-semantic-and-acoustic-cues-in-normal-and-dichotic-listening-2411.11308</loc><lastmod>2025-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncovering-the-role-of-semantic-and-acoustic-cues-in-normal-and-dichotic-listening-2411.11308"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncovering-the-role-of-semantic-and-acoustic-cues-in-normal-and-dichotic-listening-2411.11308"/></url>
<url><loc>https://scifaro.com/en/abs/an-investigation-of-reprogramming-for-cross-language-adaptation-in-speaker-verification-systems-2411.11353</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-investigation-of-reprogramming-for-cross-language-adaptation-in-speaker-verification-systems-2411.11353"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-investigation-of-reprogramming-for-cross-language-adaptation-in-speaker-verification-systems-2411.11353"/></url>
<url><loc>https://scifaro.com/en/abs/a-bandpass-twin-t-active-filter-used-in-the-buchla-200-electric-music-box-synthesizer-2411.11358</loc><lastmod>2024-11-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-bandpass-twin-t-active-filter-used-in-the-buchla-200-electric-music-box-synthesizer-2411.11358"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-bandpass-twin-t-active-filter-used-in-the-buchla-200-electric-music-box-synthesizer-2411.11358"/></url>
<url><loc>https://scifaro.com/en/abs/a-neural-denoising-vocoder-for-clean-waveform-generation-from-noisy-mel-spectrogram-based-on-amplitude-and-phase-predictions-2411.12268</loc><lastmod>2024-11-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-neural-denoising-vocoder-for-clean-waveform-generation-from-noisy-mel-spectrogram-based-on-amplitude-and-phase-predictions-2411.12268"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-neural-denoising-vocoder-for-clean-waveform-generation-from-noisy-mel-spectrogram-based-on-amplitude-and-phase-predictions-2411.12268"/></url>
<url><loc>https://scifaro.com/en/abs/class-incremental-learning-for-sound-event-localization-and-detection-2411.12830</loc><lastmod>2024-11-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/class-incremental-learning-for-sound-event-localization-and-detection-2411.12830"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/class-incremental-learning-for-sound-event-localization-and-detection-2411.12830"/></url>
<url><loc>https://scifaro.com/en/abs/wavchat-a-survey-of-spoken-dialogue-models-2411.13577</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavchat-a-survey-of-spoken-dialogue-models-2411.13577"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavchat-a-survey-of-spoken-dialogue-models-2411.13577"/></url>
<url><loc>https://scifaro.com/en/abs/sequence-to-sequence-neural-diarization-with-automatic-speaker-detection-and-representation-2411.13849</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sequence-to-sequence-neural-diarization-with-automatic-speaker-detection-and-representation-2411.13849"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sequence-to-sequence-neural-diarization-with-automatic-speaker-detection-and-representation-2411.13849"/></url>
<url><loc>https://scifaro.com/en/abs/lightweight-model-attribution-and-detection-of-synthetic-speech-via-audio-residual-fingerprints-2411.14013</loc><lastmod>2025-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lightweight-model-attribution-and-detection-of-synthetic-speech-via-audio-residual-fingerprints-2411.14013"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lightweight-model-attribution-and-detection-of-synthetic-speech-via-audio-residual-fingerprints-2411.14013"/></url>
<url><loc>https://scifaro.com/en/abs/best-std-bidirectional-mamba-enhanced-speech-tokenization-for-spoken-term-detection-2411.14100</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/best-std-bidirectional-mamba-enhanced-speech-tokenization-for-spoken-term-detection-2411.14100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/best-std-bidirectional-mamba-enhanced-speech-tokenization-for-spoken-term-detection-2411.14100"/></url>
<url><loc>https://scifaro.com/en/abs/mvanet-multi-stage-video-attention-network-for-sound-event-localization-and-detection-with-source-distance-estimation-2411.14153</loc><lastmod>2024-11-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mvanet-multi-stage-video-attention-network-for-sound-event-localization-and-detection-with-source-distance-estimation-2411.14153"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mvanet-multi-stage-video-attention-network-for-sound-event-localization-and-detection-with-source-distance-estimation-2411.14153"/></url>
<url><loc>https://scifaro.com/en/abs/open-amp-synthetic-data-framework-for-audio-effect-foundation-models-2411.14972</loc><lastmod>2024-11-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/open-amp-synthetic-data-framework-for-audio-effect-foundation-models-2411.14972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/open-amp-synthetic-data-framework-for-audio-effect-foundation-models-2411.14972"/></url>
<url><loc>https://scifaro.com/en/abs/state-space-large-audio-language-models-2411.15685</loc><lastmod>2024-11-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/state-space-large-audio-language-models-2411.15685"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/state-space-large-audio-language-models-2411.15685"/></url>
<url><loc>https://scifaro.com/en/abs/k2ssl-a-faster-and-better-framework-for-self-supervised-speech-representation-learning-2411.17100</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/k2ssl-a-faster-and-better-framework-for-self-supervised-speech-representation-learning-2411.17100"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/k2ssl-a-faster-and-better-framework-for-self-supervised-speech-representation-learning-2411.17100"/></url>
<url><loc>https://scifaro.com/en/abs/typical-vs-atypical-disfluency-classification-introducing-the-iiith-tisa-corpus-and-temporal-context-based-feature-representations-2411.17149</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/typical-vs-atypical-disfluency-classification-introducing-the-iiith-tisa-corpus-and-temporal-context-based-feature-representations-2411.17149"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/typical-vs-atypical-disfluency-classification-introducing-the-iiith-tisa-corpus-and-temporal-context-based-feature-representations-2411.17149"/></url>
<url><loc>https://scifaro.com/en/abs/towards-maximum-likelihood-training-for-transducer-based-streaming-speech-recognition-2411.17537</loc><lastmod>2024-11-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-maximum-likelihood-training-for-transducer-based-streaming-speech-recognition-2411.17537"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-maximum-likelihood-training-for-transducer-based-streaming-speech-recognition-2411.17537"/></url>
<url><loc>https://scifaro.com/en/abs/disentangled-transformer-an-explainable-end-to-end-automatic-speech-recognition-model-with-speech-content-context-separation-2411.17846</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disentangled-transformer-an-explainable-end-to-end-automatic-speech-recognition-model-with-speech-content-context-separation-2411.17846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disentangled-transformer-an-explainable-end-to-end-automatic-speech-recognition-model-with-speech-content-context-separation-2411.17846"/></url>
<url><loc>https://scifaro.com/en/abs/speech-separation-using-neural-audio-codecs-with-embedding-loss-2411.17998</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-separation-using-neural-audio-codecs-with-embedding-loss-2411.17998"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-separation-using-neural-audio-codecs-with-embedding-loss-2411.17998"/></url>
<url><loc>https://scifaro.com/en/abs/jppo-joint-power-and-prompt-optimization-for-accelerated-large-language-model-services-2411.18010</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jppo-joint-power-and-prompt-optimization-for-accelerated-large-language-model-services-2411.18010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jppo-joint-power-and-prompt-optimization-for-accelerated-large-language-model-services-2411.18010"/></url>
<url><loc>https://scifaro.com/en/abs/salmonn-omni-a-codec-free-llm-for-full-duplex-speech-understanding-and-generation-2411.18138</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/salmonn-omni-a-codec-free-llm-for-full-duplex-speech-understanding-and-generation-2411.18138"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/salmonn-omni-a-codec-free-llm-for-full-duplex-speech-understanding-and-generation-2411.18138"/></url>
<url><loc>https://scifaro.com/en/abs/towards-improved-objective-perceptual-audio-quality-assessment-part-1-a-novel-data-driven-cognitive-model-2411.18222</loc><lastmod>2024-11-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-improved-objective-perceptual-audio-quality-assessment-part-1-a-novel-data-driven-cognitive-model-2411.18222"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-improved-objective-perceptual-audio-quality-assessment-part-1-a-novel-data-driven-cognitive-model-2411.18222"/></url>
<url><loc>https://scifaro.com/en/abs/wearable-intelligent-throat-enables-natural-speech-in-stroke-patients-with-dysarthria-2411.18266</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wearable-intelligent-throat-enables-natural-speech-in-stroke-patients-with-dysarthria-2411.18266"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wearable-intelligent-throat-enables-natural-speech-in-stroke-patients-with-dysarthria-2411.18266"/></url>
<url><loc>https://scifaro.com/en/abs/identification-and-clustering-of-unseen-ragas-in-indian-art-music-2411.18611</loc><lastmod>2025-07-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/identification-and-clustering-of-unseen-ragas-in-indian-art-music-2411.18611"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/identification-and-clustering-of-unseen-ragas-in-indian-art-music-2411.18611"/></url>
<url><loc>https://scifaro.com/en/abs/ts3-codec-transformer-based-simple-streaming-single-codec-2411.18803</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ts3-codec-transformer-based-simple-streaming-single-codec-2411.18803"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ts3-codec-transformer-based-simple-streaming-single-codec-2411.18803"/></url>
<url><loc>https://scifaro.com/en/abs/audiosetcaps-an-enriched-audio-caption-dataset-using-automated-generation-pipeline-with-large-audio-and-language-models-2411.18953</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiosetcaps-an-enriched-audio-caption-dataset-using-automated-generation-pipeline-with-large-audio-and-language-models-2411.18953"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiosetcaps-an-enriched-audio-caption-dataset-using-automated-generation-pipeline-with-large-audio-and-language-models-2411.18953"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-transformers-for-low-bitrate-high-quality-speech-coding-2411.19842</loc><lastmod>2024-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-transformers-for-low-bitrate-high-quality-speech-coding-2411.19842"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-transformers-for-low-bitrate-high-quality-speech-coding-2411.19842"/></url>
<url><loc>https://scifaro.com/en/abs/a-context-based-numerical-format-prediction-for-a-text-to-speech-system-2412.00028</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-context-based-numerical-format-prediction-for-a-text-to-speech-system-2412.00028"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-context-based-numerical-format-prediction-for-a-text-to-speech-system-2412.00028"/></url>
<url><loc>https://scifaro.com/en/abs/high-precision-medical-speech-recognition-through-synthetic-data-and-semantic-correction-united-medasr-2412.00055</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/high-precision-medical-speech-recognition-through-synthetic-data-and-semantic-correction-united-medasr-2412.00055"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/high-precision-medical-speech-recognition-through-synthetic-data-and-semantic-correction-united-medasr-2412.00055"/></url>
<url><loc>https://scifaro.com/en/abs/feasibility-of-mental-health-triage-call-priority-prediction-using-machine-learning-2412.00057</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/feasibility-of-mental-health-triage-call-priority-prediction-using-machine-learning-2412.00057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/feasibility-of-mental-health-triage-call-priority-prediction-using-machine-learning-2412.00057"/></url>
<url><loc>https://scifaro.com/en/abs/ssdm-2-0-time-accurate-speech-rich-transcription-with-non-fluencies-2412.00265</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ssdm-2-0-time-accurate-speech-rich-transcription-with-non-fluencies-2412.00265"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ssdm-2-0-time-accurate-speech-rich-transcription-with-non-fluencies-2412.00265"/></url>
<url><loc>https://scifaro.com/en/abs/automating-feedback-analysis-in-surgical-training-detection-categorization-and-assessment-2412.00760</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automating-feedback-analysis-in-surgical-training-detection-categorization-and-assessment-2412.00760"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automating-feedback-analysis-in-surgical-training-detection-categorization-and-assessment-2412.00760"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-spoof-voices-in-asian-non-native-speech-an-indonesian-and-thai-case-study-2412.01040</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-spoof-voices-in-asian-non-native-speech-an-indonesian-and-thai-case-study-2412.01040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-spoof-voices-in-asian-non-native-speech-an-indonesian-and-thai-case-study-2412.01040"/></url>
<url><loc>https://scifaro.com/en/abs/deep-learning-based-approach-for-identification-and-compensation-of-nonlinear-distortions-in-parametric-array-loudspeakers-2412.01092</loc><lastmod>2025-07-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-learning-based-approach-for-identification-and-compensation-of-nonlinear-distortions-in-parametric-array-loudspeakers-2412.01092"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-learning-based-approach-for-identification-and-compensation-of-nonlinear-distortions-in-parametric-array-loudspeakers-2412.01092"/></url>
<url><loc>https://scifaro.com/en/abs/alignformer-modality-matching-can-achieve-better-zero-shot-instruction-following-speech-llm-2412.01145</loc><lastmod>2025-07-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/alignformer-modality-matching-can-achieve-better-zero-shot-instruction-following-speech-llm-2412.01145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/alignformer-modality-matching-can-achieve-better-zero-shot-instruction-following-speech-llm-2412.01145"/></url>
<url><loc>https://scifaro.com/en/abs/memory-efficient-training-for-deep-speaker-embedding-learning-in-speaker-verification-2412.01195</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/memory-efficient-training-for-deep-speaker-embedding-learning-in-speaker-verification-2412.01195"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/memory-efficient-training-for-deep-speaker-embedding-learning-in-speaker-verification-2412.01195"/></url>
<url><loc>https://scifaro.com/en/abs/text-based-audio-retrieval-by-learning-from-similarities-between-audio-captions-2412.01356</loc><lastmod>2024-12-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-based-audio-retrieval-by-learning-from-similarities-between-audio-captions-2412.01356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-based-audio-retrieval-by-learning-from-similarities-between-audio-captions-2412.01356"/></url>
<url><loc>https://scifaro.com/en/abs/taco-training-free-sound-prompted-segmentation-via-semantically-constrained-audio-visual-co-factorization-2412.01488</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/taco-training-free-sound-prompted-segmentation-via-semantically-constrained-audio-visual-co-factorization-2412.01488"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/taco-training-free-sound-prompted-segmentation-via-semantically-constrained-audio-visual-co-factorization-2412.01488"/></url>
<url><loc>https://scifaro.com/en/abs/late-fusion-ensembles-for-speech-recognition-on-diverse-input-audio-representations-2412.01861</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/late-fusion-ensembles-for-speech-recognition-on-diverse-input-audio-representations-2412.01861"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/late-fusion-ensembles-for-speech-recognition-on-diverse-input-audio-representations-2412.01861"/></url>
<url><loc>https://scifaro.com/en/abs/a-machine-hearing-system-for-robust-cough-detection-based-on-a-high-level-representation-of-band-specific-audio-features-2412.01996</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-machine-hearing-system-for-robust-cough-detection-based-on-a-high-level-representation-of-band-specific-audio-features-2412.01996"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-machine-hearing-system-for-robust-cough-detection-based-on-a-high-level-representation-of-band-specific-audio-features-2412.01996"/></url>
<url><loc>https://scifaro.com/en/abs/a-theoretical-framework-for-acoustic-neighbor-embeddings-2412.02164</loc><lastmod>2024-12-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-theoretical-framework-for-acoustic-neighbor-embeddings-2412.02164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-theoretical-framework-for-acoustic-neighbor-embeddings-2412.02164"/></url>
<url><loc>https://scifaro.com/en/abs/comprehensive-audio-query-handling-system-with-integrated-expert-models-and-contextual-understanding-2412.03980</loc><lastmod>2024-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comprehensive-audio-query-handling-system-with-integrated-expert-models-and-contextual-understanding-2412.03980"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comprehensive-audio-query-handling-system-with-integrated-expert-models-and-contextual-understanding-2412.03980"/></url>
<url><loc>https://scifaro.com/en/abs/integrated-minimum-mean-squared-error-algorithms-for-combined-acoustic-echo-cancellation-and-noise-reduction-2412.04267</loc><lastmod>2026-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrated-minimum-mean-squared-error-algorithms-for-combined-acoustic-echo-cancellation-and-noise-reduction-2412.04267"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrated-minimum-mean-squared-error-algorithms-for-combined-acoustic-echo-cancellation-and-noise-reduction-2412.04267"/></url>
<url><loc>https://scifaro.com/en/abs/ca-sslr-condition-aware-self-supervised-learning-representation-for-generalized-speech-processing-2412.04425</loc><lastmod>2024-12-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ca-sslr-condition-aware-self-supervised-learning-representation-for-generalized-speech-processing-2412.04425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ca-sslr-condition-aware-self-supervised-learning-representation-for-generalized-speech-processing-2412.04425"/></url>
<url><loc>https://scifaro.com/en/abs/stablevc-style-controllable-zero-shot-voice-conversion-with-conditional-flow-matching-2412.04724</loc><lastmod>2024-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stablevc-style-controllable-zero-shot-voice-conversion-with-conditional-flow-matching-2412.04724"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stablevc-style-controllable-zero-shot-voice-conversion-with-conditional-flow-matching-2412.04724"/></url>
<url><loc>https://scifaro.com/en/abs/perceptually-transparent-binaural-auralization-of-simulated-sound-fields-2412.05015</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptually-transparent-binaural-auralization-of-simulated-sound-fields-2412.05015"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptually-transparent-binaural-auralization-of-simulated-sound-fields-2412.05015"/></url>
<url><loc>https://scifaro.com/en/abs/sq-whisper-speaker-querying-based-whisper-model-for-target-speaker-asr-2412.05589</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sq-whisper-speaker-querying-based-whisper-model-for-target-speaker-asr-2412.05589"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sq-whisper-speaker-querying-based-whisper-model-for-target-speaker-asr-2412.05589"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-prompt-learning-and-pause-encoding-for-alzheimer-s-disease-detection-2412.06259</loc><lastmod>2024-12-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-prompt-learning-and-pause-encoding-for-alzheimer-s-disease-detection-2412.06259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-prompt-learning-and-pause-encoding-for-alzheimer-s-disease-detection-2412.06259"/></url>
<url><loc>https://scifaro.com/en/abs/spatio-temporal-latent-representations-for-the-analysis-of-acoustic-scenes-in-the-wild-2412.07648</loc><lastmod>2024-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatio-temporal-latent-representations-for-the-analysis-of-acoustic-scenes-in-the-wild-2412.07648"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatio-temporal-latent-representations-for-the-analysis-of-acoustic-scenes-in-the-wild-2412.07648"/></url>
<url><loc>https://scifaro.com/en/abs/evaluating-the-impact-of-discriminative-and-generative-e2e-speech-enhancement-models-on-syllable-stress-preservation-2412.08306</loc><lastmod>2024-12-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/evaluating-the-impact-of-discriminative-and-generative-e2e-speech-enhancement-models-on-syllable-stress-preservation-2412.08306"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/evaluating-the-impact-of-discriminative-and-generative-e2e-speech-enhancement-models-on-syllable-stress-preservation-2412.08306"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-code-switching-asr-leveraging-non-peaky-ctc-loss-and-deep-language-posterior-injection-2412.08651</loc><lastmod>2024-12-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-code-switching-asr-leveraging-non-peaky-ctc-loss-and-deep-language-posterior-injection-2412.08651"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-code-switching-asr-leveraging-non-peaky-ctc-loss-and-deep-language-posterior-injection-2412.08651"/></url>
<url><loc>https://scifaro.com/en/abs/cssinger-end-to-end-chunkwise-streaming-singing-voice-synthesis-system-based-on-conditional-variational-autoencoder-2412.08918</loc><lastmod>2024-12-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cssinger-end-to-end-chunkwise-streaming-singing-voice-synthesis-system-based-on-conditional-variational-autoencoder-2412.08918"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cssinger-end-to-end-chunkwise-streaming-singing-voice-synthesis-system-based-on-conditional-variational-autoencoder-2412.08918"/></url>
<url><loc>https://scifaro.com/en/abs/csl-l2m-controllable-song-level-lyric-to-melody-generation-based-on-conditional-transformer-with-fine-grained-lyric-and-musical-controls-2412.09887</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/csl-l2m-controllable-song-level-lyric-to-melody-generation-based-on-conditional-transformer-with-fine-grained-lyric-and-musical-controls-2412.09887"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/csl-l2m-controllable-song-level-lyric-to-melody-generation-based-on-conditional-transformer-with-fine-grained-lyric-and-musical-controls-2412.09887"/></url>
<url><loc>https://scifaro.com/en/abs/masv-speaker-verification-with-global-and-local-context-mamba-2412.10989</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/masv-speaker-verification-with-global-and-local-context-mamba-2412.10989"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/masv-speaker-verification-with-global-and-local-context-mamba-2412.10989"/></url>
<url><loc>https://scifaro.com/en/abs/transliterated-zero-shot-domain-adaptation-for-automatic-speech-recognition-2412.11185</loc><lastmod>2024-12-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transliterated-zero-shot-domain-adaptation-for-automatic-speech-recognition-2412.11185"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transliterated-zero-shot-domain-adaptation-for-automatic-speech-recognition-2412.11185"/></url>
<url><loc>https://scifaro.com/en/abs/a-lightweight-and-robust-method-for-blind-wideband-to-fullband-extension-of-speech-2412.11392</loc><lastmod>2025-07-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-lightweight-and-robust-method-for-blind-wideband-to-fullband-extension-of-speech-2412.11392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-lightweight-and-robust-method-for-blind-wideband-to-fullband-extension-of-speech-2412.11392"/></url>
<url><loc>https://scifaro.com/en/abs/speechprune-context-aware-token-pruning-for-speech-information-retrieval-2412.12009</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speechprune-context-aware-token-pruning-for-speech-information-retrieval-2412.12009"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speechprune-context-aware-token-pruning-for-speech-information-retrieval-2412.12009"/></url>
<url><loc>https://scifaro.com/en/abs/ntc-kws-noise-aware-ctc-for-robust-keyword-spotting-2412.12614</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ntc-kws-noise-aware-ctc-for-robust-keyword-spotting-2412.12614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ntc-kws-noise-aware-ctc-for-robust-keyword-spotting-2412.12614"/></url>
<url><loc>https://scifaro.com/en/abs/streaming-keyword-spotting-boosted-by-cross-layer-discrimination-consistency-2412.12635</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/streaming-keyword-spotting-boosted-by-cross-layer-discrimination-consistency-2412.12635"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/streaming-keyword-spotting-boosted-by-cross-layer-discrimination-consistency-2412.12635"/></url>
<url><loc>https://scifaro.com/en/abs/deep-speech-synthesis-from-multimodal-articulatory-representations-2412.13387</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-multimodal-articulatory-representations-2412.13387"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-speech-synthesis-from-multimodal-articulatory-representations-2412.13387"/></url>
<url><loc>https://scifaro.com/en/abs/songeditor-adapting-zero-shot-song-generation-language-model-as-a-multi-task-editor-2412.13786</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songeditor-adapting-zero-shot-song-generation-language-model-as-a-multi-task-editor-2412.13786"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songeditor-adapting-zero-shot-song-generation-language-model-as-a-multi-task-editor-2412.13786"/></url>
<url><loc>https://scifaro.com/en/abs/speech-watermarking-with-discrete-intermediate-representations-2412.13917</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-watermarking-with-discrete-intermediate-representations-2412.13917"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-watermarking-with-discrete-intermediate-representations-2412.13917"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-the-effects-of-diffusion-based-conditional-generative-speech-models-used-for-speech-enhancement-on-dysarthric-speech-2412.13933</loc><lastmod>2024-12-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-the-effects-of-diffusion-based-conditional-generative-speech-models-used-for-speech-enhancement-on-dysarthric-speech-2412.13933"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-the-effects-of-diffusion-based-conditional-generative-speech-models-used-for-speech-enhancement-on-dysarthric-speech-2412.13933"/></url>
<url><loc>https://scifaro.com/en/abs/scale-this-not-that-investigating-key-dataset-attributes-for-efficient-speech-enhancement-scaling-2412.14890</loc><lastmod>2024-12-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scale-this-not-that-investigating-key-dataset-attributes-for-efficient-speech-enhancement-scaling-2412.14890"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scale-this-not-that-investigating-key-dataset-attributes-for-efficient-speech-enhancement-scaling-2412.14890"/></url>
<url><loc>https://scifaro.com/en/abs/transcribing-and-translating-fast-and-slow-joint-speech-translation-and-recognition-2412.15415</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transcribing-and-translating-fast-and-slow-joint-speech-translation-and-recognition-2412.15415"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transcribing-and-translating-fast-and-slow-joint-speech-translation-and-recognition-2412.15415"/></url>
<url><loc>https://scifaro.com/en/abs/touchasp-elastic-automatic-speech-perception-that-everyone-can-touch-2412.15622</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/touchasp-elastic-automatic-speech-perception-that-everyone-can-touch-2412.15622"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/touchasp-elastic-automatic-speech-perception-that-everyone-can-touch-2412.15622"/></url>
<url><loc>https://scifaro.com/en/abs/slam-omni-timbre-controllable-voice-interaction-system-with-single-stage-training-2412.15649</loc><lastmod>2024-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slam-omni-timbre-controllable-voice-interaction-system-with-single-stage-training-2412.15649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slam-omni-timbre-controllable-voice-interaction-system-with-single-stage-training-2412.15649"/></url>
<url><loc>https://scifaro.com/en/abs/interleaved-speech-text-language-models-for-simple-streaming-text-to-speech-synthesis-2412.16102</loc><lastmod>2025-08-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/interleaved-speech-text-language-models-for-simple-streaming-text-to-speech-synthesis-2412.16102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/interleaved-speech-text-language-models-for-simple-streaming-text-to-speech-synthesis-2412.16102"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-multilingual-asr-for-unseen-languages-via-language-embedding-modeling-2412.16474</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-multilingual-asr-for-unseen-languages-via-language-embedding-modeling-2412.16474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-multilingual-asr-for-unseen-languages-via-language-embedding-modeling-2412.16474"/></url>
<url><loc>https://scifaro.com/en/abs/speech-retrieval-augmented-generation-without-automatic-speech-recognition-2412.16500</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-retrieval-augmented-generation-without-automatic-speech-recognition-2412.16500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-retrieval-augmented-generation-without-automatic-speech-recognition-2412.16500"/></url>
<url><loc>https://scifaro.com/en/abs/time-graph-frequency-representation-with-singular-value-decomposition-for-neural-speech-enhancement-2412.16823</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/time-graph-frequency-representation-with-singular-value-decomposition-for-neural-speech-enhancement-2412.16823"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/time-graph-frequency-representation-with-singular-value-decomposition-for-neural-speech-enhancement-2412.16823"/></url>
<url><loc>https://scifaro.com/en/abs/kall-e-autoregressive-speech-synthesis-with-next-distribution-prediction-2412.16846</loc><lastmod>2025-09-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kall-e-autoregressive-speech-synthesis-with-next-distribution-prediction-2412.16846"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kall-e-autoregressive-speech-synthesis-with-next-distribution-prediction-2412.16846"/></url>
<url><loc>https://scifaro.com/en/abs/speech-based-depression-prediction-using-encoder-weight-only-transfer-learning-and-a-large-corpus-2412.16900</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-based-depression-prediction-using-encoder-weight-only-transfer-learning-and-a-large-corpus-2412.16900"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-based-depression-prediction-using-encoder-weight-only-transfer-learning-and-a-large-corpus-2412.16900"/></url>
<url><loc>https://scifaro.com/en/abs/incremental-disentanglement-for-environment-aware-zero-shot-text-to-speech-synthesis-2412.16977</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/incremental-disentanglement-for-environment-aware-zero-shot-text-to-speech-synthesis-2412.16977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/incremental-disentanglement-for-environment-aware-zero-shot-text-to-speech-synthesis-2412.16977"/></url>
<url><loc>https://scifaro.com/en/abs/why-do-speech-language-models-fail-to-generate-semantically-coherent-outputs-a-modality-evolving-perspective-2412.17048</loc><lastmod>2026-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-do-speech-language-models-fail-to-generate-semantically-coherent-outputs-a-modality-evolving-perspective-2412.17048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-do-speech-language-models-fail-to-generate-semantically-coherent-outputs-a-modality-evolving-perspective-2412.17048"/></url>
<url><loc>https://scifaro.com/en/abs/scalable-speech-enhancement-with-dynamic-channel-pruning-2412.17121</loc><lastmod>2025-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scalable-speech-enhancement-with-dynamic-channel-pruning-2412.17121"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scalable-speech-enhancement-with-dynamic-channel-pruning-2412.17121"/></url>
<url><loc>https://scifaro.com/en/abs/uncovering-the-visual-contribution-in-audio-visual-speech-recognition-2412.17129</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncovering-the-visual-contribution-in-audio-visual-speech-recognition-2412.17129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncovering-the-visual-contribution-in-audio-visual-speech-recognition-2412.17129"/></url>
<url><loc>https://scifaro.com/en/abs/tandem-spoofing-robust-automatic-speaker-verification-based-on-time-domain-embeddings-2412.17133</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tandem-spoofing-robust-automatic-speaker-verification-based-on-time-domain-embeddings-2412.17133"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tandem-spoofing-robust-automatic-speaker-verification-based-on-time-domain-embeddings-2412.17133"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-of-speech-temporal-dynamics-in-the-context-of-speaker-verification-and-voice-anonymization-2412.17164</loc><lastmod>2025-04-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-of-speech-temporal-dynamics-in-the-context-of-speaker-verification-and-voice-anonymization-2412.17164"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-of-speech-temporal-dynamics-in-the-context-of-speaker-verification-and-voice-anonymization-2412.17164"/></url>
<url><loc>https://scifaro.com/en/abs/domain-incremental-learning-for-audio-classification-2412.17424</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/domain-incremental-learning-for-audio-classification-2412.17424"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/domain-incremental-learning-for-audio-classification-2412.17424"/></url>
<url><loc>https://scifaro.com/en/abs/ume-upcycling-mixture-of-experts-for-scalable-and-efficient-automatic-speech-recognition-2412.17507</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ume-upcycling-mixture-of-experts-for-scalable-and-efficient-automatic-speech-recognition-2412.17507"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ume-upcycling-mixture-of-experts-for-scalable-and-efficient-automatic-speech-recognition-2412.17507"/></url>
<url><loc>https://scifaro.com/en/abs/from-kan-to-gr-kan-advancing-speech-enhancement-with-kan-based-methodology-2412.17778</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-kan-to-gr-kan-advancing-speech-enhancement-with-kan-based-methodology-2412.17778"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-kan-to-gr-kan-advancing-speech-enhancement-with-kan-based-methodology-2412.17778"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-prosodic-signatures-via-speech-pre-trained-models-for-audio-deepfake-source-attribution-2412.17796</loc><lastmod>2024-12-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-prosodic-signatures-via-speech-pre-trained-models-for-audio-deepfake-source-attribution-2412.17796"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-prosodic-signatures-via-speech-pre-trained-models-for-audio-deepfake-source-attribution-2412.17796"/></url>
<url><loc>https://scifaro.com/en/abs/songglm-lyric-to-melody-generation-with-2d-alignment-encoding-and-multi-task-pre-training-2412.18107</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songglm-lyric-to-melody-generation-with-2d-alignment-encoding-and-multi-task-pre-training-2412.18107"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songglm-lyric-to-melody-generation-with-2d-alignment-encoding-and-multi-task-pre-training-2412.18107"/></url>
<url><loc>https://scifaro.com/en/abs/neural-directed-speech-enhancement-with-dual-microphone-array-in-high-noise-scenario-2412.18141</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-directed-speech-enhancement-with-dual-microphone-array-in-high-noise-scenario-2412.18141"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-directed-speech-enhancement-with-dual-microphone-array-in-high-noise-scenario-2412.18141"/></url>
<url><loc>https://scifaro.com/en/abs/text-aware-adapter-for-few-shot-keyword-spotting-2412.18142</loc><lastmod>2025-05-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/text-aware-adapter-for-few-shot-keyword-spotting-2412.18142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/text-aware-adapter-for-few-shot-keyword-spotting-2412.18142"/></url>
<url><loc>https://scifaro.com/en/abs/a-zero-shot-physics-informed-dictionary-learning-approach-for-sound-field-reconstruction-2412.18348</loc><lastmod>2024-12-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-zero-shot-physics-informed-dictionary-learning-approach-for-sound-field-reconstruction-2412.18348"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-zero-shot-physics-informed-dictionary-learning-approach-for-sound-field-reconstruction-2412.18348"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-acoustic-textual-emotional-inconsistency-information-for-automatic-depression-detection-2412.18614</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-acoustic-textual-emotional-inconsistency-information-for-automatic-depression-detection-2412.18614"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-acoustic-textual-emotional-inconsistency-information-for-automatic-depression-detection-2412.18614"/></url>
<url><loc>https://scifaro.com/en/abs/zema-dataset-a-comprehensive-study-of-yaredawi-zema-with-a-focus-on-horologium-chants-2412.18784</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zema-dataset-a-comprehensive-study-of-yaredawi-zema-with-a-focus-on-horologium-chants-2412.18784"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zema-dataset-a-comprehensive-study-of-yaredawi-zema-with-a-focus-on-horologium-chants-2412.18784"/></url>
<url><loc>https://scifaro.com/en/abs/computational-analysis-of-yaredawi-yezema-silt-in-ethiopian-orthodox-tewahedo-church-chants-2412.18788</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/computational-analysis-of-yaredawi-yezema-silt-in-ethiopian-orthodox-tewahedo-church-chants-2412.18788"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/computational-analysis-of-yaredawi-yezema-silt-in-ethiopian-orthodox-tewahedo-church-chants-2412.18788"/></url>
<url><loc>https://scifaro.com/en/abs/structured-speaker-deficiency-adaptation-of-foundation-models-for-dysarthric-and-elderly-speech-recognition-2412.18832</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/structured-speaker-deficiency-adaptation-of-foundation-models-for-dysarthric-and-elderly-speech-recognition-2412.18832"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/structured-speaker-deficiency-adaptation-of-foundation-models-for-dysarthric-and-elderly-speech-recognition-2412.18832"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-audiovisual-speech-recognition-through-bifocal-preference-optimization-2412.19005</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-audiovisual-speech-recognition-through-bifocal-preference-optimization-2412.19005"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-audiovisual-speech-recognition-through-bifocal-preference-optimization-2412.19005"/></url>
<url><loc>https://scifaro.com/en/abs/attacking-voice-anonymization-systems-with-augmented-feature-and-speaker-identity-difference-2412.19068</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/attacking-voice-anonymization-systems-with-augmented-feature-and-speaker-identity-difference-2412.19068"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/attacking-voice-anonymization-systems-with-augmented-feature-and-speaker-identity-difference-2412.19068"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speech-and-natural-language-processing-models-for-depression-screening-2412.19072</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speech-and-natural-language-processing-models-for-depression-screening-2412.19072"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speech-and-natural-language-processing-models-for-depression-screening-2412.19072"/></url>
<url><loc>https://scifaro.com/en/abs/graph-enhanced-dual-stream-feature-fusion-with-pre-trained-model-for-acoustic-traffic-monitoring-2412.19078</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/graph-enhanced-dual-stream-feature-fusion-with-pre-trained-model-for-acoustic-traffic-monitoring-2412.19078"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/graph-enhanced-dual-stream-feature-fusion-with-pre-trained-model-for-acoustic-traffic-monitoring-2412.19078"/></url>
<url><loc>https://scifaro.com/en/abs/causal-speech-enhancement-with-predicting-semantics-based-on-quantized-self-supervised-learning-features-2412.19248</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-speech-enhancement-with-predicting-semantics-based-on-quantized-self-supervised-learning-features-2412.19248"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-speech-enhancement-with-predicting-semantics-based-on-quantized-self-supervised-learning-features-2412.19248"/></url>
<url><loc>https://scifaro.com/en/abs/voicedit-dual-condition-diffusion-transformer-for-environment-aware-speech-synthesis-2412.19259</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicedit-dual-condition-diffusion-transformer-for-environment-aware-speech-synthesis-2412.19259"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicedit-dual-condition-diffusion-transformer-for-environment-aware-speech-synthesis-2412.19259"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-single-asr-model-that-generalizes-to-disordered-speech-2412.19315</loc><lastmod>2025-12-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-single-asr-model-that-generalizes-to-disordered-speech-2412.19315"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-single-asr-model-that-generalizes-to-disordered-speech-2412.19315"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-based-delayless-subband-adaptive-filter-using-complex-self-attention-for-active-noise-control-2412.19471</loc><lastmod>2024-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-based-delayless-subband-adaptive-filter-using-complex-self-attention-for-active-noise-control-2412.19471"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-based-delayless-subband-adaptive-filter-using-complex-self-attention-for-active-noise-control-2412.19471"/></url>
<url><loc>https://scifaro.com/en/abs/crossspeech-cross-lingual-speech-synthesis-with-decoupled-language-and-speaker-generation-2412.20048</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/crossspeech-cross-lingual-speech-synthesis-with-decoupled-language-and-speaker-generation-2412.20048"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/crossspeech-cross-lingual-speech-synthesis-with-decoupled-language-and-speaker-generation-2412.20048"/></url>
<url><loc>https://scifaro.com/en/abs/distance-based-single-channel-target-speech-extraction-2412.20144</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distance-based-single-channel-target-speech-extraction-2412.20144"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distance-based-single-channel-target-speech-extraction-2412.20144"/></url>
<url><loc>https://scifaro.com/en/abs/bird-vocalization-embedding-extraction-using-self-supervised-disentangled-representation-learning-2412.20146</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bird-vocalization-embedding-extraction-using-self-supervised-disentangled-representation-learning-2412.20146"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bird-vocalization-embedding-extraction-using-self-supervised-disentangled-representation-learning-2412.20146"/></url>
<url><loc>https://scifaro.com/en/abs/emoreg-directional-latent-vector-modeling-for-emotional-intensity-regularization-in-diffusion-based-voice-conversion-2412.20359</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emoreg-directional-latent-vector-modeling-for-emotional-intensity-regularization-in-diffusion-based-voice-conversion-2412.20359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emoreg-directional-latent-vector-modeling-for-emotional-intensity-regularization-in-diffusion-based-voice-conversion-2412.20359"/></url>
<url><loc>https://scifaro.com/en/abs/metadata-enhanced-speech-emotion-recognition-augmented-residual-integration-and-co-attention-in-two-stage-fine-tuning-2412.20707</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/metadata-enhanced-speech-emotion-recognition-augmented-residual-integration-and-co-attention-in-two-stage-fine-tuning-2412.20707"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/metadata-enhanced-speech-emotion-recognition-augmented-residual-integration-and-co-attention-in-two-stage-fine-tuning-2412.20707"/></url>
<url><loc>https://scifaro.com/en/abs/improving-acoustic-scene-classification-in-low-resource-conditions-2412.20722</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-acoustic-scene-classification-in-low-resource-conditions-2412.20722"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-acoustic-scene-classification-in-low-resource-conditions-2412.20722"/></url>
<url><loc>https://scifaro.com/en/abs/phoneme-level-contrastive-learning-for-user-defined-keyword-spotting-with-flexible-enrollment-2412.20805</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/phoneme-level-contrastive-learning-for-user-defined-keyword-spotting-with-flexible-enrollment-2412.20805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/phoneme-level-contrastive-learning-for-user-defined-keyword-spotting-with-flexible-enrollment-2412.20805"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-multimodal-emotion-recognition-through-multi-granularity-cross-modal-alignment-2412.20821</loc><lastmod>2024-12-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-multimodal-emotion-recognition-through-multi-granularity-cross-modal-alignment-2412.20821"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-multimodal-emotion-recognition-through-multi-granularity-cross-modal-alignment-2412.20821"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-with-llms-adapted-to-disordered-speech-using-reinforcement-learning-2501.00039</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-with-llms-adapted-to-disordered-speech-using-reinforcement-learning-2501.00039"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-with-llms-adapted-to-disordered-speech-using-reinforcement-learning-2501.00039"/></url>
<url><loc>https://scifaro.com/en/abs/dicow-diarization-conditioned-whisper-for-target-speaker-automatic-speech-recognition-2501.00114</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dicow-diarization-conditioned-whisper-for-target-speaker-automatic-speech-recognition-2501.00114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dicow-diarization-conditioned-whisper-for-target-speaker-automatic-speech-recognition-2501.00114"/></url>
<url><loc>https://scifaro.com/en/abs/tackling-cognitive-impairment-detection-from-speech-a-submission-to-the-process-challenge-2501.00145</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tackling-cognitive-impairment-detection-from-speech-a-submission-to-the-process-challenge-2501.00145"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tackling-cognitive-impairment-detection-from-speech-a-submission-to-the-process-challenge-2501.00145"/></url>
<url><loc>https://scifaro.com/en/abs/voicerestore-flow-matching-transformers-for-speech-recording-quality-restoration-2501.00794</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicerestore-flow-matching-transformers-for-speech-recording-quality-restoration-2501.00794"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicerestore-flow-matching-transformers-for-speech-recording-quality-restoration-2501.00794"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-text-pronunciation-correlation-generation-and-application-for-contextual-biasing-2501.00804</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-text-pronunciation-correlation-generation-and-application-for-contextual-biasing-2501.00804"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-text-pronunciation-correlation-generation-and-application-for-contextual-biasing-2501.00804"/></url>
<url><loc>https://scifaro.com/en/abs/slide-integrating-speech-language-model-with-llm-for-spontaneous-spoken-dialogue-generation-2501.00805</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/slide-integrating-speech-language-model-with-llm-for-spontaneous-spoken-dialogue-generation-2501.00805"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/slide-integrating-speech-language-model-with-llm-for-spontaneous-spoken-dialogue-generation-2501.00805"/></url>
<url><loc>https://scifaro.com/en/abs/disambiguation-of-chinese-polyphones-in-an-end-to-end-framework-with-semantic-features-extracted-by-pre-trained-bert-2501.01102</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/disambiguation-of-chinese-polyphones-in-an-end-to-end-framework-with-semantic-features-extracted-by-pre-trained-bert-2501.01102"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/disambiguation-of-chinese-polyphones-in-an-end-to-end-framework-with-semantic-features-extracted-by-pre-trained-bert-2501.01102"/></url>
<url><loc>https://scifaro.com/en/abs/learning-discriminative-features-from-spectrograms-using-center-loss-for-speech-emotion-recognition-2501.01103</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-discriminative-features-from-spectrograms-using-center-loss-for-speech-emotion-recognition-2501.01103"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-discriminative-features-from-spectrograms-using-center-loss-for-speech-emotion-recognition-2501.01103"/></url>
<url><loc>https://scifaro.com/en/abs/sensitivity-of-room-impulse-responses-in-changing-acoustic-environment-2501.01206</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sensitivity-of-room-impulse-responses-in-changing-acoustic-environment-2501.01206"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sensitivity-of-room-impulse-responses-in-changing-acoustic-environment-2501.01206"/></url>
<url><loc>https://scifaro.com/en/abs/voicevector-multimodal-enrolment-vectors-for-speaker-separation-2501.01401</loc><lastmod>2025-01-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voicevector-multimodal-enrolment-vectors-for-speaker-separation-2501.01401"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voicevector-multimodal-enrolment-vectors-for-speaker-separation-2501.01401"/></url>
<url><loc>https://scifaro.com/en/abs/reading-to-listen-at-the-cocktail-party-multi-modal-speech-separation-2501.01518</loc><lastmod>2025-01-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reading-to-listen-at-the-cocktail-party-multi-modal-speech-separation-2501.01518"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reading-to-listen-at-the-cocktail-party-multi-modal-speech-separation-2501.01518"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-audio-compression-through-entropy-controlled-dithering-2501.02293</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-audio-compression-through-entropy-controlled-dithering-2501.02293"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-audio-compression-through-entropy-controlled-dithering-2501.02293"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-long-speech-sequence-modelling-for-time-domain-depression-level-estimation-2501.02512</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-long-speech-sequence-modelling-for-time-domain-depression-level-estimation-2501.02512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-long-speech-sequence-modelling-for-time-domain-depression-level-estimation-2501.02512"/></url>
<url><loc>https://scifaro.com/en/abs/a-frequency-aware-augmentation-network-for-mental-disorders-assessment-from-audio-2501.02516</loc><lastmod>2025-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-frequency-aware-augmentation-network-for-mental-disorders-assessment-from-audio-2501.02516"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-frequency-aware-augmentation-network-for-mental-disorders-assessment-from-audio-2501.02516"/></url>
<url><loc>https://scifaro.com/en/abs/single-channel-distance-based-source-separation-for-mobile-gpu-in-outdoor-and-indoor-environments-2501.03045</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/single-channel-distance-based-source-separation-for-mobile-gpu-in-outdoor-and-indoor-environments-2501.03045"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/single-channel-distance-based-source-separation-for-mobile-gpu-in-outdoor-and-indoor-environments-2501.03045"/></url>
<url><loc>https://scifaro.com/en/abs/noise-robust-target-speaker-voice-activity-detection-through-self-supervised-pretraining-2501.03184</loc><lastmod>2025-01-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-robust-target-speaker-voice-activity-detection-through-self-supervised-pretraining-2501.03184"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-robust-target-speaker-voice-activity-detection-through-self-supervised-pretraining-2501.03184"/></url>
<url><loc>https://scifaro.com/en/abs/breaking-through-the-spike-spike-window-decoding-for-accelerated-and-precise-automatic-speech-recognition-2501.03257</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/breaking-through-the-spike-spike-window-decoding-for-accelerated-and-precise-automatic-speech-recognition-2501.03257"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/breaking-through-the-spike-spike-window-decoding-for-accelerated-and-precise-automatic-speech-recognition-2501.03257"/></url>
<url><loc>https://scifaro.com/en/abs/overview-of-automatic-speech-analysis-and-technologies-for-neurodegenerative-disorders-diagnosis-and-assistive-applications-2501.03536</loc><lastmod>2025-08-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/overview-of-automatic-speech-analysis-and-technologies-for-neurodegenerative-disorders-diagnosis-and-assistive-applications-2501.03536"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/overview-of-automatic-speech-analysis-and-technologies-for-neurodegenerative-disorders-diagnosis-and-assistive-applications-2501.03536"/></url>
<url><loc>https://scifaro.com/en/abs/towards-a-generalizable-speech-marker-for-parkinson-s-disease-diagnosis-2501.03581</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-a-generalizable-speech-marker-for-parkinson-s-disease-diagnosis-2501.03581"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-a-generalizable-speech-marker-for-parkinson-s-disease-diagnosis-2501.03581"/></url>
<url><loc>https://scifaro.com/en/abs/universal-speaker-embedding-free-target-speaker-extraction-and-personal-voice-activity-detection-2501.03612</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/universal-speaker-embedding-free-target-speaker-extraction-and-personal-voice-activity-detection-2501.03612"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/universal-speaker-embedding-free-target-speaker-extraction-and-personal-voice-activity-detection-2501.03612"/></url>
<url><loc>https://scifaro.com/en/abs/detecting-neurocognitive-disorders-through-analyses-of-topic-evolution-and-cross-modal-consistency-in-visual-stimulated-narratives-2501.03727</loc><lastmod>2025-10-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/detecting-neurocognitive-disorders-through-analyses-of-topic-evolution-and-cross-modal-consistency-in-visual-stimulated-narratives-2501.03727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/detecting-neurocognitive-disorders-through-analyses-of-topic-evolution-and-cross-modal-consistency-in-visual-stimulated-narratives-2501.03727"/></url>
<url><loc>https://scifaro.com/en/abs/pseudo-strong-labels-from-frame-level-predictions-for-weakly-supervised-sound-event-detection-2501.03740</loc><lastmod>2025-01-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseudo-strong-labels-from-frame-level-predictions-for-weakly-supervised-sound-event-detection-2501.03740"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseudo-strong-labels-from-frame-level-predictions-for-weakly-supervised-sound-event-detection-2501.03740"/></url>
<url><loc>https://scifaro.com/en/abs/spectral-aware-low-rank-adaptation-for-speaker-verification-2501.03829</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spectral-aware-low-rank-adaptation-for-speaker-verification-2501.03829"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spectral-aware-low-rank-adaptation-for-speaker-verification-2501.03829"/></url>
<url><loc>https://scifaro.com/en/abs/dconnear-an-artifact-free-neural-network-architecture-for-closed-loop-audio-signal-processing-2501.04116</loc><lastmod>2025-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dconnear-an-artifact-free-neural-network-architecture-for-closed-loop-audio-signal-processing-2501.04116"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dconnear-an-artifact-free-neural-network-architecture-for-closed-loop-audio-signal-processing-2501.04116"/></url>
<url><loc>https://scifaro.com/en/abs/decoding-eeg-speech-perception-with-transformers-and-vae-based-data-augmentation-2501.04359</loc><lastmod>2025-12-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/decoding-eeg-speech-perception-with-transformers-and-vae-based-data-augmentation-2501.04359"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/decoding-eeg-speech-perception-with-transformers-and-vae-based-data-augmentation-2501.04359"/></url>
<url><loc>https://scifaro.com/en/abs/zsvc-zero-shot-style-voice-conversion-with-disentangled-latent-diffusion-models-and-adversarial-training-2501.04416</loc><lastmod>2025-01-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/zsvc-zero-shot-style-voice-conversion-with-disentangled-latent-diffusion-models-and-adversarial-training-2501.04416"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/zsvc-zero-shot-style-voice-conversion-with-disentangled-latent-diffusion-models-and-adversarial-training-2501.04416"/></url>
<url><loc>https://scifaro.com/en/abs/flespeech-flexibly-controllable-speech-generation-with-various-prompts-2501.04644</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flespeech-flexibly-controllable-speech-generation-with-various-prompts-2501.04644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flespeech-flexibly-controllable-speech-generation-with-various-prompts-2501.04644"/></url>
<url><loc>https://scifaro.com/en/abs/meta-learning-based-percussion-transcription-and-t-bar-a-la-identification-from-low-resource-audio-2501.04742</loc><lastmod>2025-06-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/meta-learning-based-percussion-transcription-and-t-bar-a-la-identification-from-low-resource-audio-2501.04742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/meta-learning-based-percussion-transcription-and-t-bar-a-la-identification-from-low-resource-audio-2501.04742"/></url>
<url><loc>https://scifaro.com/en/abs/comparison-of-fundamental-frequency-estimators-with-subharmonic-voice-signals-2501.04789</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comparison-of-fundamental-frequency-estimators-with-subharmonic-voice-signals-2501.04789"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comparison-of-fundamental-frequency-estimators-with-subharmonic-voice-signals-2501.04789"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-listened-speech-decoding-from-eeg-via-parallel-phoneme-sequence-prediction-2501.04844</loc><lastmod>2025-01-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-listened-speech-decoding-from-eeg-via-parallel-phoneme-sequence-prediction-2501.04844"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-listened-speech-decoding-from-eeg-via-parallel-phoneme-sequence-prediction-2501.04844"/></url>
<url><loc>https://scifaro.com/en/abs/flowhigh-towards-efficient-and-high-quality-audio-super-resolution-with-single-step-flow-matching-2501.04926</loc><lastmod>2025-03-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flowhigh-towards-efficient-and-high-quality-audio-super-resolution-with-single-step-flow-matching-2501.04926"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flowhigh-towards-efficient-and-high-quality-audio-super-resolution-with-single-step-flow-matching-2501.04926"/></url>
<url><loc>https://scifaro.com/en/abs/a-large-scale-probing-analysis-of-speaker-specific-attributes-in-self-supervised-speech-representations-2501.05310</loc><lastmod>2026-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-large-scale-probing-analysis-of-speaker-specific-attributes-in-self-supervised-speech-representations-2501.05310"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-large-scale-probing-analysis-of-speaker-specific-attributes-in-self-supervised-speech-representations-2501.05310"/></url>
<url><loc>https://scifaro.com/en/abs/mel-spectrogram-inversion-via-alternating-direction-method-of-multipliers-2501.05557</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mel-spectrogram-inversion-via-alternating-direction-method-of-multipliers-2501.05557"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mel-spectrogram-inversion-via-alternating-direction-method-of-multipliers-2501.05557"/></url>
<url><loc>https://scifaro.com/en/abs/sub-band-domain-multi-hypothesis-acoustic-echo-canceler-based-acoustic-scene-analysis-2501.05652</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sub-band-domain-multi-hypothesis-acoustic-echo-canceler-based-acoustic-scene-analysis-2501.05652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sub-band-domain-multi-hypothesis-acoustic-echo-canceler-based-acoustic-scene-analysis-2501.05652"/></url>
<url><loc>https://scifaro.com/en/abs/mars6-a-small-and-robust-hierarchical-codec-text-to-speech-model-2501.05787</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mars6-a-small-and-robust-hierarchical-codec-text-to-speech-model-2501.05787"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mars6-a-small-and-robust-hierarchical-codec-text-to-speech-model-2501.05787"/></url>
<url><loc>https://scifaro.com/en/abs/large-model-empowered-streaming-speech-semantic-communications-2501.05859</loc><lastmod>2025-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/large-model-empowered-streaming-speech-semantic-communications-2501.05859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/large-model-empowered-streaming-speech-semantic-communications-2501.05859"/></url>
<url><loc>https://scifaro.com/en/abs/estimation-and-restoration-of-unknown-nonlinear-distortion-using-diffusion-2501.05959</loc><lastmod>2025-01-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/estimation-and-restoration-of-unknown-nonlinear-distortion-using-diffusion-2501.05959"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/estimation-and-restoration-of-unknown-nonlinear-distortion-using-diffusion-2501.05959"/></url>
<url><loc>https://scifaro.com/en/abs/low-resource-text-to-speech-synthesis-using-noise-augmented-training-of-forwardtacotron-2501.05976</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-resource-text-to-speech-synthesis-using-noise-augmented-training-of-forwardtacotron-2501.05976"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-resource-text-to-speech-synthesis-using-noise-augmented-training-of-forwardtacotron-2501.05976"/></url>
<url><loc>https://scifaro.com/en/abs/tts-transducer-end-to-end-speech-synthesis-with-neural-transducer-2501.06320</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tts-transducer-end-to-end-speech-synthesis-with-neural-transducer-2501.06320"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tts-transducer-end-to-end-speech-synthesis-with-neural-transducer-2501.06320"/></url>
<url><loc>https://scifaro.com/en/abs/the-1st-speechwellness-challenge-detecting-suicide-risk-among-adolescents-2501.06474</loc><lastmod>2025-09-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-1st-speechwellness-challenge-detecting-suicide-risk-among-adolescents-2501.06474"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-1st-speechwellness-challenge-detecting-suicide-risk-among-adolescents-2501.06474"/></url>
<url><loc>https://scifaro.com/en/abs/speech-recognition-for-automatically-assessing-afrikaans-and-isixhosa-preschool-oral-narratives-2501.06478</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-recognition-for-automatically-assessing-afrikaans-and-isixhosa-preschool-oral-narratives-2501.06478"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-recognition-for-automatically-assessing-afrikaans-and-isixhosa-preschool-oral-narratives-2501.06478"/></url>
<url><loc>https://scifaro.com/en/abs/multi-modal-speech-enhancement-with-limited-electromyography-channels-2501.06530</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-modal-speech-enhancement-with-limited-electromyography-channels-2501.06530"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-modal-speech-enhancement-with-limited-electromyography-channels-2501.06530"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-speech-unit-extraction-via-independent-component-analysis-2501.06562</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-speech-unit-extraction-via-independent-component-analysis-2501.06562"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-speech-unit-extraction-via-independent-component-analysis-2501.06562"/></url>
<url><loc>https://scifaro.com/en/abs/integrating-pause-information-with-word-embeddings-in-language-models-for-alzheimer-s-disease-detection-from-spontaneous-speech-2501.06727</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/integrating-pause-information-with-word-embeddings-in-language-models-for-alzheimer-s-disease-detection-from-spontaneous-speech-2501.06727"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/integrating-pause-information-with-word-embeddings-in-language-models-for-alzheimer-s-disease-detection-from-spontaneous-speech-2501.06727"/></url>
<url><loc>https://scifaro.com/en/abs/improving-cross-lingual-phonetic-representation-of-low-resource-languages-through-language-similarity-analysis-2501.06810</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-cross-lingual-phonetic-representation-of-low-resource-languages-through-language-similarity-analysis-2501.06810"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-cross-lingual-phonetic-representation-of-low-resource-languages-through-language-similarity-analysis-2501.06810"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-array-signal-processing-and-deep-learning-for-speech-enhancement-2501.07215</loc><lastmod>2025-01-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-array-signal-processing-and-deep-learning-for-speech-enhancement-2501.07215"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-array-signal-processing-and-deep-learning-for-speech-enhancement-2501.07215"/></url>
<url><loc>https://scifaro.com/en/abs/completing-sets-of-prototype-transfer-functions-for-subspace-based-direction-of-arrival-estimation-of-multiple-speakers-2501.07524</loc><lastmod>2026-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/completing-sets-of-prototype-transfer-functions-for-subspace-based-direction-of-arrival-estimation-of-multiple-speakers-2501.07524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/completing-sets-of-prototype-transfer-functions-for-subspace-based-direction-of-arrival-estimation-of-multiple-speakers-2501.07524"/></url>
<url><loc>https://scifaro.com/en/abs/gen-a-generalizing-ambisonics-neural-encoding-to-unseen-microphone-arrays-2501.08047</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gen-a-generalizing-ambisonics-neural-encoding-to-unseen-microphone-arrays-2501.08047"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gen-a-generalizing-ambisonics-neural-encoding-to-unseen-microphone-arrays-2501.08047"/></url>
<url><loc>https://scifaro.com/en/abs/optimizing-speech-multi-view-feature-fusion-through-conditional-computation-2501.08057</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/optimizing-speech-multi-view-feature-fusion-through-conditional-computation-2501.08057"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/optimizing-speech-multi-view-feature-fusion-through-conditional-computation-2501.08057"/></url>
<url><loc>https://scifaro.com/en/abs/loudspeaker-beamforming-to-enhance-speech-recognition-performance-of-voice-driven-applications-2501.08104</loc><lastmod>2025-05-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/loudspeaker-beamforming-to-enhance-speech-recognition-performance-of-voice-driven-applications-2501.08104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/loudspeaker-beamforming-to-enhance-speech-recognition-performance-of-voice-driven-applications-2501.08104"/></url>
<url><loc>https://scifaro.com/en/abs/neural-speech-tracking-in-a-virtual-acoustic-environment-audio-visual-benefit-for-unscripted-continuous-speech-2501.08124</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-speech-tracking-in-a-virtual-acoustic-environment-audio-visual-benefit-for-unscripted-continuous-speech-2501.08124"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-speech-tracking-in-a-virtual-acoustic-environment-audio-visual-benefit-for-unscripted-continuous-speech-2501.08124"/></url>
<url><loc>https://scifaro.com/en/abs/automatic-live-music-song-identification-using-multi-level-deep-sequence-similarity-learning-2501.08129</loc><lastmod>2025-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/automatic-live-music-song-identification-using-multi-level-deep-sequence-similarity-learning-2501.08129"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/automatic-live-music-song-identification-using-multi-level-deep-sequence-similarity-learning-2501.08129"/></url>
<url><loc>https://scifaro.com/en/abs/seal-speaker-error-correction-using-acoustic-conditioned-large-language-models-2501.08421</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seal-speaker-error-correction-using-acoustic-conditioned-large-language-models-2501.08421"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seal-speaker-error-correction-using-acoustic-conditioned-large-language-models-2501.08421"/></url>
<url><loc>https://scifaro.com/en/abs/iitkgp-absp-submission-to-lre22-language-recognition-in-low-resource-settings-2501.08616</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/iitkgp-absp-submission-to-lre22-language-recognition-in-low-resource-settings-2501.08616"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/iitkgp-absp-submission-to-lre22-language-recognition-in-low-resource-settings-2501.08616"/></url>
<url><loc>https://scifaro.com/en/abs/speech-synthesis-along-perceptual-voice-quality-dimensions-2501.08791</loc><lastmod>2025-01-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-synthesis-along-perceptual-voice-quality-dimensions-2501.08791"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-synthesis-along-perceptual-voice-quality-dimensions-2501.08791"/></url>
<url><loc>https://scifaro.com/en/abs/persoda-personalized-data-augmentation-for-personalized-asr-2501.09113</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/persoda-personalized-data-augmentation-for-personalized-asr-2501.09113"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/persoda-personalized-data-augmentation-for-personalized-asr-2501.09113"/></url>
<url><loc>https://scifaro.com/en/abs/towards-detecting-the-pathological-subharmonic-voicing-with-fully-convolutional-neural-networks-2501.09159</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-detecting-the-pathological-subharmonic-voicing-with-fully-convolutional-neural-networks-2501.09159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-detecting-the-pathological-subharmonic-voicing-with-fully-convolutional-neural-networks-2501.09159"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-speaker-identity-text-guided-target-speech-extraction-2501.09169</loc><lastmod>2025-01-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-speaker-identity-text-guided-target-speech-extraction-2501.09169"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-speaker-identity-text-guided-target-speech-extraction-2501.09169"/></url>
<url><loc>https://scifaro.com/en/abs/quantum-enhanced-transformers-for-robust-acoustic-scene-classification-in-iot-environments-2501.09394</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantum-enhanced-transformers-for-robust-acoustic-scene-classification-in-iot-environments-2501.09394"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantum-enhanced-transformers-for-robust-acoustic-scene-classification-in-iot-environments-2501.09394"/></url>
<url><loc>https://scifaro.com/en/abs/clap-s-support-set-based-adaptation-for-downstream-fiber-optic-acoustic-recognition-2501.09877</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/clap-s-support-set-based-adaptation-for-downstream-fiber-optic-acoustic-recognition-2501.09877"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/clap-s-support-set-based-adaptation-for-downstream-fiber-optic-acoustic-recognition-2501.09877"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-rhythm-and-voice-conversion-of-dysarthric-to-healthy-speech-for-asr-2501.10256</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-rhythm-and-voice-conversion-of-dysarthric-to-healthy-speech-for-asr-2501.10256"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-rhythm-and-voice-conversion-of-dysarthric-to-healthy-speech-for-asr-2501.10256"/></url>
<url><loc>https://scifaro.com/en/abs/on-ambisonic-source-separation-with-spatially-informed-non-negative-tensor-factorization-2501.10305</loc><lastmod>2025-01-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-ambisonic-source-separation-with-spatially-informed-non-negative-tensor-factorization-2501.10305"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-ambisonic-source-separation-with-spatially-informed-non-negative-tensor-factorization-2501.10305"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-cross-attention-transformer-and-multi-feature-fusion-for-cross-linguistic-speech-emotion-recognition-2501.10408</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-cross-attention-transformer-and-multi-feature-fusion-for-cross-linguistic-speech-emotion-recognition-2501.10408"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-cross-attention-transformer-and-multi-feature-fusion-for-cross-linguistic-speech-emotion-recognition-2501.10408"/></url>
<url><loc>https://scifaro.com/en/abs/gec-rag-improving-generative-error-correction-via-retrieval-augmented-generation-for-automatic-speech-recognition-systems-2501.10734</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gec-rag-improving-generative-error-correction-via-retrieval-augmented-generation-for-automatic-speech-recognition-systems-2501.10734"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gec-rag-improving-generative-error-correction-via-retrieval-augmented-generation-for-automatic-speech-recognition-systems-2501.10734"/></url>
<url><loc>https://scifaro.com/en/abs/flashsr-one-step-versatile-audio-super-resolution-via-diffusion-distillation-2501.10807</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flashsr-one-step-versatile-audio-super-resolution-via-diffusion-distillation-2501.10807"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flashsr-one-step-versatile-audio-super-resolution-via-diffusion-distillation-2501.10807"/></url>
<url><loc>https://scifaro.com/en/abs/sef-pnet-speaker-encoder-free-personalized-speech-enhancement-with-local-and-global-contexts-aggregation-2501.11274</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sef-pnet-speaker-encoder-free-personalized-speech-enhancement-with-local-and-global-contexts-aggregation-2501.11274"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sef-pnet-speaker-encoder-free-personalized-speech-enhancement-with-local-and-global-contexts-aggregation-2501.11274"/></url>
<url><loc>https://scifaro.com/en/abs/llm-supervised-pre-training-for-multimodal-emotion-recognition-in-conversations-2501.11468</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llm-supervised-pre-training-for-multimodal-emotion-recognition-in-conversations-2501.11468"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llm-supervised-pre-training-for-multimodal-emotion-recognition-in-conversations-2501.11468"/></url>
<url><loc>https://scifaro.com/en/abs/30-years-of-source-separation-research-achievements-and-future-challenges-2501.11837</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/30-years-of-source-separation-research-achievements-and-future-challenges-2501.11837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/30-years-of-source-separation-research-achievements-and-future-challenges-2501.11837"/></url>
<url><loc>https://scifaro.com/en/abs/rate-aware-learned-speech-compression-2501.11999</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rate-aware-learned-speech-compression-2501.11999"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rate-aware-learned-speech-compression-2501.11999"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-with-overlapped-frame-information-fusion-and-causal-self-attention-2501.12004</loc><lastmod>2025-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-with-overlapped-frame-information-fusion-and-causal-self-attention-2501.12004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-with-overlapped-frame-information-fusion-and-causal-self-attention-2501.12004"/></url>
<url><loc>https://scifaro.com/en/abs/a-domain-adaptation-framework-for-speech-recognition-systems-with-only-synthetic-data-2501.12501</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-domain-adaptation-framework-for-speech-recognition-systems-with-only-synthetic-data-2501.12501"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-domain-adaptation-framework-for-speech-recognition-systems-with-only-synthetic-data-2501.12501"/></url>
<url><loc>https://scifaro.com/en/abs/emotech-a-multi-modal-speech-emotion-recognition-using-multi-source-low-level-information-with-hybrid-recurrent-network-2501.12674</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emotech-a-multi-modal-speech-emotion-recognition-using-multi-source-low-level-information-with-hybrid-recurrent-network-2501.12674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emotech-a-multi-modal-speech-emotion-recognition-using-multi-source-low-level-information-with-hybrid-recurrent-network-2501.12674"/></url>
<url><loc>https://scifaro.com/en/abs/emoformer-a-text-independent-speech-emotion-recognition-using-a-hybrid-transformer-cnn-model-2501.12682</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emoformer-a-text-independent-speech-emotion-recognition-using-a-hybrid-transformer-cnn-model-2501.12682"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emoformer-a-text-independent-speech-emotion-recognition-using-a-hybrid-transformer-cnn-model-2501.12682"/></url>
<url><loc>https://scifaro.com/en/abs/soundspring-loss-resilient-audio-transceiver-with-dual-functional-masked-language-modeling-2501.12696</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/soundspring-loss-resilient-audio-transceiver-with-dual-functional-masked-language-modeling-2501.12696"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/soundspring-loss-resilient-audio-transceiver-with-dual-functional-masked-language-modeling-2501.12696"/></url>
<url><loc>https://scifaro.com/en/abs/why-disentanglement-based-speaker-anonymization-systems-fail-at-preserving-emotions-2501.13000</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-disentanglement-based-speaker-anonymization-systems-fail-at-preserving-emotions-2501.13000"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-disentanglement-based-speaker-anonymization-systems-fail-at-preserving-emotions-2501.13000"/></url>
<url><loc>https://scifaro.com/en/abs/retrieval-augmented-neural-field-for-hrtf-upsampling-and-personalization-2501.13017</loc><lastmod>2025-01-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/retrieval-augmented-neural-field-for-hrtf-upsampling-and-personalization-2501.13017"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/retrieval-augmented-neural-field-for-hrtf-upsampling-and-personalization-2501.13017"/></url>
<url><loc>https://scifaro.com/en/abs/generative-data-augmentation-challenge-synthesis-of-room-acoustics-for-speaker-distance-estimation-2501.13250</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-data-augmentation-challenge-synthesis-of-room-acoustics-for-speaker-distance-estimation-2501.13250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-data-augmentation-challenge-synthesis-of-room-acoustics-for-speaker-distance-estimation-2501.13250"/></url>
<url><loc>https://scifaro.com/en/abs/generative-data-augmentation-challenge-zero-shot-speech-synthesis-for-personalized-speech-enhancement-2501.13372</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generative-data-augmentation-challenge-zero-shot-speech-synthesis-for-personalized-speech-enhancement-2501.13372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generative-data-augmentation-challenge-zero-shot-speech-synthesis-for-personalized-speech-enhancement-2501.13372"/></url>
<url><loc>https://scifaro.com/en/abs/learning-based-a-posteriori-speech-presence-probability-estimation-and-applications-2501.13642</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/learning-based-a-posteriori-speech-presence-probability-estimation-and-applications-2501.13642"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/learning-based-a-posteriori-speech-presence-probability-estimation-and-applications-2501.13642"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-finetuned-audio-llm-on-heart-murmur-features-2501.13884</loc><lastmod>2025-01-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-finetuned-audio-llm-on-heart-murmur-features-2501.13884"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-finetuned-audio-llm-on-heart-murmur-features-2501.13884"/></url>
<url><loc>https://scifaro.com/en/abs/generalizable-audio-deepfake-detection-via-latent-space-refinement-and-augmentation-2501.14240</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalizable-audio-deepfake-detection-via-latent-space-refinement-and-augmentation-2501.14240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalizable-audio-deepfake-detection-via-latent-space-refinement-and-augmentation-2501.14240"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-emotion-and-speaker-adaptation-in-llm-based-tts-via-characteristic-specific-partial-fine-tuning-2501.14273</loc><lastmod>2026-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-emotion-and-speaker-adaptation-in-llm-based-tts-via-characteristic-specific-partial-fine-tuning-2501.14273"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-emotion-and-speaker-adaptation-in-llm-based-tts-via-characteristic-specific-partial-fine-tuning-2501.14273"/></url>
<url><loc>https://scifaro.com/en/abs/fireredasr-open-source-industrial-grade-mandarin-speech-recognition-models-from-encoder-decoder-to-llm-integration-2501.14350</loc><lastmod>2025-01-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fireredasr-open-source-industrial-grade-mandarin-speech-recognition-models-from-encoder-decoder-to-llm-integration-2501.14350"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fireredasr-open-source-industrial-grade-mandarin-speech-recognition-models-from-encoder-decoder-to-llm-integration-2501.14350"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-intelligibility-for-generative-target-speech-extraction-via-joint-optimization-with-target-speaker-asr-2501.14477</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-intelligibility-for-generative-target-speech-extraction-via-joint-optimization-with-target-speaker-asr-2501.14477"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-intelligibility-for-generative-target-speech-extraction-via-joint-optimization-with-target-speaker-asr-2501.14477"/></url>
<url><loc>https://scifaro.com/en/abs/diffusion-based-text-to-music-generation-with-global-and-local-text-based-conditioning-2501.14680</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffusion-based-text-to-music-generation-with-global-and-local-text-based-conditioning-2501.14680"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffusion-based-text-to-music-generation-with-global-and-local-text-based-conditioning-2501.14680"/></url>
<url><loc>https://scifaro.com/en/abs/end-to-end-target-speaker-speech-recognition-using-context-aware-attention-mechanisms-for-challenging-enrollment-scenario-2501.15466</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/end-to-end-target-speaker-speech-recognition-using-context-aware-attention-mechanisms-for-challenging-enrollment-scenario-2501.15466"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/end-to-end-target-speaker-speech-recognition-using-context-aware-attention-mechanisms-for-challenging-enrollment-scenario-2501.15466"/></url>
<url><loc>https://scifaro.com/en/abs/variational-bayesian-adaptive-learning-of-deep-latent-variables-for-acoustic-knowledge-transfer-2501.15496</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-bayesian-adaptive-learning-of-deep-latent-variables-for-acoustic-knowledge-transfer-2501.15496"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-bayesian-adaptive-learning-of-deep-latent-variables-for-acoustic-knowledge-transfer-2501.15496"/></url>
<url><loc>https://scifaro.com/en/abs/noise-disturbance-and-lack-of-privacy-modeling-acoustic-dissatisfaction-in-open-plan-offices-2501.15744</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/noise-disturbance-and-lack-of-privacy-modeling-acoustic-dissatisfaction-in-open-plan-offices-2501.15744"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/noise-disturbance-and-lack-of-privacy-modeling-acoustic-dissatisfaction-in-open-plan-offices-2501.15744"/></url>
<url><loc>https://scifaro.com/en/abs/rift-entropy-optimised-fractional-wavelet-constellations-for-ideal-time-frequency-estimation-2501.15764</loc><lastmod>2026-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rift-entropy-optimised-fractional-wavelet-constellations-for-ideal-time-frequency-estimation-2501.15764"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rift-entropy-optimised-fractional-wavelet-constellations-for-ideal-time-frequency-estimation-2501.15764"/></url>
<url><loc>https://scifaro.com/en/abs/edsep-an-effective-diffusion-based-method-for-speech-source-separation-2501.15965</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/edsep-an-effective-diffusion-based-method-for-speech-source-separation-2501.15965"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/edsep-an-effective-diffusion-based-method-for-speech-source-separation-2501.15965"/></url>
<url><loc>https://scifaro.com/en/abs/separate-this-and-all-of-these-things-around-it-music-source-separation-via-hyperellipsoidal-queries-2501.16171</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/separate-this-and-all-of-these-things-around-it-music-source-separation-via-hyperellipsoidal-queries-2501.16171"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/separate-this-and-all-of-these-things-around-it-music-source-separation-via-hyperellipsoidal-queries-2501.16171"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-and-exploring-mild-cognitive-impairment-detection-with-w2v-bert-2-0-2501.16201</loc><lastmod>2025-01-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-and-exploring-mild-cognitive-impairment-detection-with-w2v-bert-2-0-2501.16201"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-and-exploring-mild-cognitive-impairment-detection-with-w2v-bert-2-0-2501.16201"/></url>
<url><loc>https://scifaro.com/en/abs/developing-enhanced-conversational-agents-for-social-virtual-worlds-2501.16341</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/developing-enhanced-conversational-agents-for-social-virtual-worlds-2501.16341"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/developing-enhanced-conversational-agents-for-social-virtual-worlds-2501.16341"/></url>
<url><loc>https://scifaro.com/en/abs/whispa-semantically-and-psychologically-aligned-whisper-with-self-supervised-contrastive-and-student-teacher-learning-2501.16344</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/whispa-semantically-and-psychologically-aligned-whisper-with-self-supervised-contrastive-and-student-teacher-learning-2501.16344"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/whispa-semantically-and-psychologically-aligned-whisper-with-self-supervised-contrastive-and-student-teacher-learning-2501.16344"/></url>
<url><loc>https://scifaro.com/en/abs/neural-kalman-filters-for-acoustic-echo-cancellation-2501.16367</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neural-kalman-filters-for-acoustic-echo-cancellation-2501.16367"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neural-kalman-filters-for-acoustic-echo-cancellation-2501.16367"/></url>
<url><loc>https://scifaro.com/en/abs/unipet-spk-a-unified-framework-for-parameter-efficient-tuning-of-pre-trained-speech-models-for-robust-speaker-verification-2501.16542</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unipet-spk-a-unified-framework-for-parameter-efficient-tuning-of-pre-trained-speech-models-for-robust-speaker-verification-2501.16542"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unipet-spk-a-unified-framework-for-parameter-efficient-tuning-of-pre-trained-speech-models-for-robust-speaker-verification-2501.16542"/></url>
<url><loc>https://scifaro.com/en/abs/scdiar-a-streaming-diarization-system-based-on-speaker-change-detection-and-speech-recognition-2501.16641</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scdiar-a-streaming-diarization-system-based-on-speaker-change-detection-and-speech-recognition-2501.16641"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scdiar-a-streaming-diarization-system-based-on-speaker-change-detection-and-speech-recognition-2501.16641"/></url>
<url><loc>https://scifaro.com/en/abs/cosyaudio-improving-audio-generation-with-confidence-scores-and-synthetic-captions-2501.16761</loc><lastmod>2025-01-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cosyaudio-improving-audio-generation-with-confidence-scores-and-synthetic-captions-2501.16761"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cosyaudio-improving-audio-generation-with-confidence-scores-and-synthetic-captions-2501.16761"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-frameworks-for-speaker-verification-via-bootstrapped-positive-sampling-2501.17772</loc><lastmod>2025-07-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-frameworks-for-speaker-verification-via-bootstrapped-positive-sampling-2501.17772"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-frameworks-for-speaker-verification-via-bootstrapped-positive-sampling-2501.17772"/></url>
<url><loc>https://scifaro.com/en/abs/language-modelling-for-speaker-diarization-in-telephonic-interviews-2501.17893</loc><lastmod>2025-01-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-modelling-for-speaker-diarization-in-telephonic-interviews-2501.17893"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-modelling-for-speaker-diarization-in-telephonic-interviews-2501.17893"/></url>
<url><loc>https://scifaro.com/en/abs/ambisonics-binaural-rendering-via-masked-magnitude-least-squares-2501.18224</loc><lastmod>2025-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ambisonics-binaural-rendering-via-masked-magnitude-least-squares-2501.18224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ambisonics-binaural-rendering-via-masked-magnitude-least-squares-2501.18224"/></url>
<url><loc>https://scifaro.com/en/abs/bsm-imagls-ild-informed-binaural-signal-matching-for-reproduction-with-head-mounted-microphone-arrays-2501.18227</loc><lastmod>2025-06-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bsm-imagls-ild-informed-binaural-signal-matching-for-reproduction-with-head-mounted-microphone-arrays-2501.18227"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bsm-imagls-ild-informed-binaural-signal-matching-for-reproduction-with-head-mounted-microphone-arrays-2501.18227"/></url>
<url><loc>https://scifaro.com/en/abs/ml-aris-multilayer-underwater-acoustic-reconfigurable-intelligent-surface-with-high-resolution-reflection-control-2501.18355</loc><lastmod>2026-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ml-aris-multilayer-underwater-acoustic-reconfigurable-intelligent-surface-with-high-resolution-reflection-control-2501.18355"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ml-aris-multilayer-underwater-acoustic-reconfigurable-intelligent-surface-with-high-resolution-reflection-control-2501.18355"/></url>
<url><loc>https://scifaro.com/en/abs/resampling-filter-design-for-multirate-neural-audio-effect-processing-2501.18470</loc><lastmod>2025-05-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/resampling-filter-design-for-multirate-neural-audio-effect-processing-2501.18470"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/resampling-filter-design-for-multirate-neural-audio-effect-processing-2501.18470"/></url>
<url><loc>https://scifaro.com/en/abs/language-bias-in-self-supervised-learning-for-automatic-speech-recognition-2501.19321</loc><lastmod>2025-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/language-bias-in-self-supervised-learning-for-automatic-speech-recognition-2501.19321"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/language-bias-in-self-supervised-learning-for-automatic-speech-recognition-2501.19321"/></url>
<url><loc>https://scifaro.com/en/abs/toward-noise-robust-whisper-keyword-spotting-on-headphones-with-in-earcup-microphone-and-curriculum-learning-2502.00295</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/toward-noise-robust-whisper-keyword-spotting-on-headphones-with-in-earcup-microphone-and-curriculum-learning-2502.00295"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/toward-noise-robust-whisper-keyword-spotting-on-headphones-with-in-earcup-microphone-and-curriculum-learning-2502.00295"/></url>
<url><loc>https://scifaro.com/en/abs/do-neonates-hear-what-we-measure-assessing-neonatal-ward-soundscapes-at-the-neonates-ears-2502.00565</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-neonates-hear-what-we-measure-assessing-neonatal-ward-soundscapes-at-the-neonates-ears-2502.00565"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-neonates-hear-what-we-measure-assessing-neonatal-ward-soundscapes-at-the-neonates-ears-2502.00565"/></url>
<url><loc>https://scifaro.com/en/abs/mwhisper-flamingo-for-multilingual-audio-visual-noise-robust-speech-recognition-2502.01547</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mwhisper-flamingo-for-multilingual-audio-visual-noise-robust-speech-recognition-2502.01547"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mwhisper-flamingo-for-multilingual-audio-visual-noise-robust-speech-recognition-2502.01547"/></url>
<url><loc>https://scifaro.com/en/abs/safeguarding-privacy-in-edge-speech-understanding-with-tiny-foundation-models-2502.01649</loc><lastmod>2025-12-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/safeguarding-privacy-in-edge-speech-understanding-with-tiny-foundation-models-2502.01649"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/safeguarding-privacy-in-edge-speech-understanding-with-tiny-foundation-models-2502.01649"/></url>
<url><loc>https://scifaro.com/en/abs/complexdec-a-domain-robust-high-fidelity-neural-audio-codec-with-complex-spectrum-modeling-2502.02019</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/complexdec-a-domain-robust-high-fidelity-neural-audio-codec-with-complex-spectrum-modeling-2502.02019"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/complexdec-a-domain-robust-high-fidelity-neural-audio-codec-with-complex-spectrum-modeling-2502.02019"/></url>
<url><loc>https://scifaro.com/en/abs/self-supervised-convolutional-audio-models-are-flexible-acoustic-feature-learners-a-domain-specificity-and-transfer-learning-study-2502.02366</loc><lastmod>2025-02-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/self-supervised-convolutional-audio-models-are-flexible-acoustic-feature-learners-a-domain-specificity-and-transfer-learning-study-2502.02366"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/self-supervised-convolutional-audio-models-are-flexible-acoustic-feature-learners-a-domain-specificity-and-transfer-learning-study-2502.02366"/></url>
<url><loc>https://scifaro.com/en/abs/seal-speech-embedding-alignment-learning-for-speech-large-language-model-with-retrieval-augmented-generation-2502.02603</loc><lastmod>2025-12-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/seal-speech-embedding-alignment-learning-for-speech-large-language-model-with-retrieval-augmented-generation-2502.02603"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/seal-speech-embedding-alignment-learning-for-speech-large-language-model-with-retrieval-augmented-generation-2502.02603"/></url>
<url><loc>https://scifaro.com/en/abs/gense-generative-speech-enhancement-via-language-models-using-hierarchical-modeling-2502.02942</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gense-generative-speech-enhancement-via-language-models-using-hierarchical-modeling-2502.02942"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gense-generative-speech-enhancement-via-language-models-using-hierarchical-modeling-2502.02942"/></url>
<url><loc>https://scifaro.com/en/abs/fine-grained-preference-optimization-improves-zero-shot-text-to-speech-2502.02950</loc><lastmod>2025-12-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fine-grained-preference-optimization-improves-zero-shot-text-to-speech-2502.02950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fine-grained-preference-optimization-improves-zero-shot-text-to-speech-2502.02950"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-broadcast-media-subtitle-transcripts-for-automatic-speech-recognition-and-subtitling-2502.03212</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-broadcast-media-subtitle-transcripts-for-automatic-speech-recognition-and-subtitling-2502.03212"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-broadcast-media-subtitle-transcripts-for-automatic-speech-recognition-and-subtitling-2502.03212"/></url>
<url><loc>https://scifaro.com/en/abs/should-audio-front-ends-be-adaptive-comparing-learnable-and-adaptive-front-ends-2502.03260</loc><lastmod>2025-02-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/should-audio-front-ends-be-adaptive-comparing-learnable-and-adaptive-front-ends-2502.03260"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/should-audio-front-ends-be-adaptive-comparing-learnable-and-adaptive-front-ends-2502.03260"/></url>
<url><loc>https://scifaro.com/en/abs/dementia-classification-from-spontaneous-speech-using-wrapper-based-feature-selection-2502.03484</loc><lastmod>2026-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dementia-classification-from-spontaneous-speech-using-wrapper-based-feature-selection-2502.03484"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dementia-classification-from-spontaneous-speech-using-wrapper-based-feature-selection-2502.03484"/></url>
<url><loc>https://scifaro.com/en/abs/comprehensive-layer-wise-analysis-of-ssl-models-for-audio-deepfake-detection-2502.03559</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/comprehensive-layer-wise-analysis-of-ssl-models-for-audio-deepfake-detection-2502.03559"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/comprehensive-layer-wise-analysis-of-ssl-models-for-audio-deepfake-detection-2502.03559"/></url>
<url><loc>https://scifaro.com/en/abs/ditar-diffusion-transformer-autoregressive-modeling-for-speech-generation-2502.03930</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditar-diffusion-transformer-autoregressive-modeling-for-speech-generation-2502.03930"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditar-diffusion-transformer-autoregressive-modeling-for-speech-generation-2502.03930"/></url>
<url><loc>https://scifaro.com/en/abs/towards-explainable-spoofed-speech-attribution-and-detection-a-probabilistic-approach-for-characterizing-speech-synthesizer-components-2502.04049</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-explainable-spoofed-speech-attribution-and-detection-a-probabilistic-approach-for-characterizing-speech-synthesizer-components-2502.04049"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-explainable-spoofed-speech-attribution-and-detection-a-probabilistic-approach-for-characterizing-speech-synthesizer-components-2502.04049"/></url>
<url><loc>https://scifaro.com/en/abs/llasa-scaling-train-time-and-inference-time-compute-for-llama-based-speech-synthesis-2502.04128</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llasa-scaling-train-time-and-inference-time-compute-for-llama-based-speech-synthesis-2502.04128"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llasa-scaling-train-time-and-inference-time-compute-for-llama-based-speech-synthesis-2502.04128"/></url>
<url><loc>https://scifaro.com/en/abs/genvc-self-supervised-zero-shot-voice-conversion-2502.04519</loc><lastmod>2025-08-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/genvc-self-supervised-zero-shot-voice-conversion-2502.04519"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/genvc-self-supervised-zero-shot-voice-conversion-2502.04519"/></url>
<url><loc>https://scifaro.com/en/abs/efficient-evaluation-of-quantization-effects-in-neural-codecs-2502.04770</loc><lastmod>2025-02-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/efficient-evaluation-of-quantization-effects-in-neural-codecs-2502.04770"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/efficient-evaluation-of-quantization-effects-in-neural-codecs-2502.04770"/></url>
<url><loc>https://scifaro.com/en/abs/distillation-and-pruning-for-scalable-self-supervised-representation-based-speech-quality-assessment-2502.05356</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/distillation-and-pruning-for-scalable-self-supervised-representation-based-speech-quality-assessment-2502.05356"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/distillation-and-pruning-for-scalable-self-supervised-representation-based-speech-quality-assessment-2502.05356"/></url>
<url><loc>https://scifaro.com/en/abs/unbiased-sliced-wasserstein-kernels-for-high-quality-audio-captioning-2502.05435</loc><lastmod>2026-02-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unbiased-sliced-wasserstein-kernels-for-high-quality-audio-captioning-2502.05435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unbiased-sliced-wasserstein-kernels-for-high-quality-audio-captioning-2502.05435"/></url>
<url><loc>https://scifaro.com/en/abs/shiftyspeech-a-large-scale-synthetic-speech-dataset-with-distribution-shifts-2502.05674</loc><lastmod>2025-05-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/shiftyspeech-a-large-scale-synthetic-speech-dataset-with-distribution-shifts-2502.05674"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/shiftyspeech-a-large-scale-synthetic-speech-dataset-with-distribution-shifts-2502.05674"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-lipreading-by-audio-visual-self-distillation-pretraining-and-speaker-adaptation-2502.05758</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-lipreading-by-audio-visual-self-distillation-pretraining-and-speaker-adaptation-2502.05758"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-lipreading-by-audio-visual-self-distillation-pretraining-and-speaker-adaptation-2502.05758"/></url>
<url><loc>https://scifaro.com/en/abs/non-invasive-electromyographic-speech-neuroprosthesis-a-geometric-perspective-2502.05762</loc><lastmod>2026-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/non-invasive-electromyographic-speech-neuroprosthesis-a-geometric-perspective-2502.05762"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/non-invasive-electromyographic-speech-neuroprosthesis-a-geometric-perspective-2502.05762"/></url>
<url><loc>https://scifaro.com/en/abs/audio-visual-representation-learning-via-knowledge-distillation-from-speech-foundation-models-2502.05766</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-visual-representation-learning-via-knowledge-distillation-from-speech-foundation-models-2502.05766"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-visual-representation-learning-via-knowledge-distillation-from-speech-foundation-models-2502.05766"/></url>
<url><loc>https://scifaro.com/en/abs/synergistic-effects-of-knowledge-distillation-and-structured-pruning-for-self-supervised-speech-models-2502.05837</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/synergistic-effects-of-knowledge-distillation-and-structured-pruning-for-self-supervised-speech-models-2502.05837"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/synergistic-effects-of-knowledge-distillation-and-structured-pruning-for-self-supervised-speech-models-2502.05837"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-use-of-performer-and-agent-attention-for-spoken-language-identification-2502.05841</loc><lastmod>2025-02-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-use-of-performer-and-agent-attention-for-spoken-language-identification-2502.05841"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-use-of-performer-and-agent-attention-for-spoken-language-identification-2502.05841"/></url>
<url><loc>https://scifaro.com/en/abs/recent-advances-in-discrete-speech-tokens-a-review-2502.06490</loc><lastmod>2025-12-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recent-advances-in-discrete-speech-tokens-a-review-2502.06490"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recent-advances-in-discrete-speech-tokens-a-review-2502.06490"/></url>
<url><loc>https://scifaro.com/en/abs/a-hybrid-model-for-weakly-supervised-speech-dereverberation-2502.06839</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-hybrid-model-for-weakly-supervised-speech-dereverberation-2502.06839"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-hybrid-model-for-weakly-supervised-speech-dereverberation-2502.06839"/></url>
<url><loc>https://scifaro.com/en/abs/vinp-variational-bayesian-inference-with-neural-speech-prior-for-joint-asr-effective-speech-dereverberation-and-blind-rir-identification-2502.07205</loc><lastmod>2025-09-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/vinp-variational-bayesian-inference-with-neural-speech-prior-for-joint-asr-effective-speech-dereverberation-and-blind-rir-identification-2502.07205"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/vinp-variational-bayesian-inference-with-neural-speech-prior-for-joint-asr-effective-speech-dereverberation-and-blind-rir-identification-2502.07205"/></url>
<url><loc>https://scifaro.com/en/abs/towards-understanding-of-frequency-dependence-on-sound-event-detection-2502.07208</loc><lastmod>2025-08-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-understanding-of-frequency-dependence-on-sound-event-detection-2502.07208"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-understanding-of-frequency-dependence-on-sound-event-detection-2502.07208"/></url>
<url><loc>https://scifaro.com/en/abs/towards-efficient-and-multifaceted-computer-assisted-pronunciation-training-leveraging-hierarchical-selective-state-space-model-and-decoupled-cross-entropy-loss-2502.07575</loc><lastmod>2025-02-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-efficient-and-multifaceted-computer-assisted-pronunciation-training-leveraging-hierarchical-selective-state-space-model-and-decoupled-cross-entropy-loss-2502.07575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-efficient-and-multifaceted-computer-assisted-pronunciation-training-leveraging-hierarchical-selective-state-space-model-and-decoupled-cross-entropy-loss-2502.07575"/></url>
<url><loc>https://scifaro.com/en/abs/renderbox-expressive-performance-rendering-with-text-control-2502.07711</loc><lastmod>2025-02-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/renderbox-expressive-performance-rendering-with-text-control-2502.07711"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/renderbox-expressive-performance-rendering-with-text-control-2502.07711"/></url>
<url><loc>https://scifaro.com/en/abs/sparse-wavefield-reconstruction-and-denoising-with-boostlets-2502.08230</loc><lastmod>2026-01-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sparse-wavefield-reconstruction-and-denoising-with-boostlets-2502.08230"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sparse-wavefield-reconstruction-and-denoising-with-boostlets-2502.08230"/></url>
<url><loc>https://scifaro.com/en/abs/causal-analysis-of-asr-errors-for-children-quantifying-the-impact-of-physiological-cognitive-and-extrinsic-factors-2502.08587</loc><lastmod>2025-02-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/causal-analysis-of-asr-errors-for-children-quantifying-the-impact-of-physiological-cognitive-and-extrinsic-factors-2502.08587"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/causal-analysis-of-asr-errors-for-children-quantifying-the-impact-of-physiological-cognitive-and-extrinsic-factors-2502.08587"/></url>
<url><loc>https://scifaro.com/en/abs/asvspoof-5-design-collection-and-validation-of-resources-for-spoofing-deepfake-and-adversarial-attack-detection-using-crowdsourced-speech-2502.08857</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/asvspoof-5-design-collection-and-validation-of-resources-for-spoofing-deepfake-and-adversarial-attack-detection-using-crowdsourced-speech-2502.08857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/asvspoof-5-design-collection-and-validation-of-resources-for-spoofing-deepfake-and-adversarial-attack-detection-using-crowdsourced-speech-2502.08857"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-cognitive-decline-a-multimodal-ai-approach-to-dementia-screening-from-speech-2502.08862</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-cognitive-decline-a-multimodal-ai-approach-to-dementia-screening-from-speech-2502.08862"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-cognitive-decline-a-multimodal-ai-approach-to-dementia-screening-from-speech-2502.08862"/></url>
<url><loc>https://scifaro.com/en/abs/advances-in-microphone-array-processing-and-multichannel-speech-enhancement-2502.09037</loc><lastmod>2025-02-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/advances-in-microphone-array-processing-and-multichannel-speech-enhancement-2502.09037"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/advances-in-microphone-array-processing-and-multichannel-speech-enhancement-2502.09037"/></url>
<url><loc>https://scifaro.com/en/abs/microphone-array-geometry-independent-multi-talker-distant-asr-ntt-system-for-the-dasr-task-of-the-chime-8-challenge-2502.09859</loc><lastmod>2025-06-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/microphone-array-geometry-independent-multi-talker-distant-asr-ntt-system-for-the-dasr-task-of-the-chime-8-challenge-2502.09859"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/microphone-array-geometry-independent-multi-talker-distant-asr-ntt-system-for-the-dasr-task-of-the-chime-8-challenge-2502.09859"/></url>
<url><loc>https://scifaro.com/en/abs/musical-score-following-using-statistical-inference-2502.10426</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-score-following-using-statistical-inference-2502.10426"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-score-following-using-statistical-inference-2502.10426"/></url>
<url><loc>https://scifaro.com/en/abs/mohave-mixture-of-hierarchical-audio-visual-experts-for-robust-speech-recognition-2502.10447</loc><lastmod>2025-05-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mohave-mixture-of-hierarchical-audio-visual-experts-for-robust-speech-recognition-2502.10447"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mohave-mixture-of-hierarchical-audio-visual-experts-for-robust-speech-recognition-2502.10447"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-age-related-robustness-in-children-speaker-verification-2502.10511</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-age-related-robustness-in-children-speaker-verification-2502.10511"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-age-related-robustness-in-children-speaker-verification-2502.10511"/></url>
<url><loc>https://scifaro.com/en/abs/neuroamp-a-novel-end-to-end-general-purpose-deep-neural-amplifier-for-personalized-hearing-aids-2502.10822</loc><lastmod>2025-09-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/neuroamp-a-novel-end-to-end-general-purpose-deep-neural-amplifier-for-personalized-hearing-aids-2502.10822"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/neuroamp-a-novel-end-to-end-general-purpose-deep-neural-amplifier-for-personalized-hearing-aids-2502.10822"/></url>
<url><loc>https://scifaro.com/en/abs/generalizable-speech-deepfake-detection-via-meta-learned-lora-2502.10838</loc><lastmod>2025-08-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generalizable-speech-deepfake-detection-via-meta-learned-lora-2502.10838"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generalizable-speech-deepfake-detection-via-meta-learned-lora-2502.10838"/></url>
<url><loc>https://scifaro.com/en/abs/speecht-rag-reliable-depression-detection-in-llms-with-retrieval-augmented-generation-using-speech-timing-information-2502.10950</loc><lastmod>2025-05-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speecht-rag-reliable-depression-detection-in-llms-with-retrieval-augmented-generation-using-speech-timing-information-2502.10950"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speecht-rag-reliable-depression-detection-in-llms-with-retrieval-augmented-generation-using-speech-timing-information-2502.10950"/></url>
<url><loc>https://scifaro.com/en/abs/audiospa-spatializing-sound-events-with-text-2502.11219</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audiospa-spatializing-sound-events-with-text-2502.11219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audiospa-spatializing-sound-events-with-text-2502.11219"/></url>
<url><loc>https://scifaro.com/en/abs/lmfca-net-a-lightweight-model-for-multi-channel-speech-enhancement-with-efficient-narrow-band-and-cross-band-attention-2502.11462</loc><lastmod>2025-02-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lmfca-net-a-lightweight-model-for-multi-channel-speech-enhancement-with-efficient-narrow-band-and-cross-band-attention-2502.11462"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lmfca-net-a-lightweight-model-for-multi-channel-speech-enhancement-with-efficient-narrow-band-and-cross-band-attention-2502.11462"/></url>
<url><loc>https://scifaro.com/en/abs/improving-rare-word-recognition-of-whisper-in-zero-shot-settings-2502.11572</loc><lastmod>2025-02-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/improving-rare-word-recognition-of-whisper-in-zero-shot-settings-2502.11572"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/improving-rare-word-recognition-of-whisper-in-zero-shot-settings-2502.11572"/></url>
<url><loc>https://scifaro.com/en/abs/a-comprehensive-survey-on-generative-ai-for-video-to-music-generation-2502.12489</loc><lastmod>2025-12-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-generative-ai-for-video-to-music-generation-2502.12489"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comprehensive-survey-on-generative-ai-for-video-to-music-generation-2502.12489"/></url>
<url><loc>https://scifaro.com/en/abs/adopting-whisper-for-confidence-estimation-2502.13446</loc><lastmod>2025-02-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adopting-whisper-for-confidence-estimation-2502.13446"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adopting-whisper-for-confidence-estimation-2502.13446"/></url>
<url><loc>https://scifaro.com/en/abs/multi-channel-replay-speech-detection-using-an-adaptive-learnable-beamformer-2502.13473</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-channel-replay-speech-detection-using-an-adaptive-learnable-beamformer-2502.13473"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-channel-replay-speech-detection-using-an-adaptive-learnable-beamformer-2502.13473"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-automatic-speech-recognition-coupled-llm-modules-for-medical-diagnostics-2502.13982</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-automatic-speech-recognition-coupled-llm-modules-for-medical-diagnostics-2502.13982"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-automatic-speech-recognition-coupled-llm-modules-for-medical-diagnostics-2502.13982"/></url>
<url><loc>https://scifaro.com/en/abs/gesture-aware-zero-shot-speech-recognition-for-patients-with-language-disorders-2502.13983</loc><lastmod>2025-02-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/gesture-aware-zero-shot-speech-recognition-for-patients-with-language-disorders-2502.13983"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/gesture-aware-zero-shot-speech-recognition-for-patients-with-language-disorders-2502.13983"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-convolution-for-cnn-based-speech-enhancement-models-2502.14224</loc><lastmod>2025-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-convolution-for-cnn-based-speech-enhancement-models-2502.14224"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-convolution-for-cnn-based-speech-enhancement-models-2502.14224"/></url>
<url><loc>https://scifaro.com/en/abs/role-of-the-pretraining-and-the-adaptation-data-sizes-for-low-resource-real-time-mri-video-segmentation-2502.14418</loc><lastmod>2025-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/role-of-the-pretraining-and-the-adaptation-data-sizes-for-low-resource-real-time-mri-video-segmentation-2502.14418"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/role-of-the-pretraining-and-the-adaptation-data-sizes-for-low-resource-real-time-mri-video-segmentation-2502.14418"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-speech-large-language-models-with-prompt-aware-mixture-of-audio-encoders-2502.15178</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-speech-large-language-models-with-prompt-aware-mixture-of-audio-encoders-2502.15178"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-speech-large-language-models-with-prompt-aware-mixture-of-audio-encoders-2502.15178"/></url>
<url><loc>https://scifaro.com/en/abs/multizone-sound-field-reproduction-with-direction-of-arrival-distribution-based-regularization-and-its-application-to-binaural-centered-mode-matching-2502.16213</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multizone-sound-field-reproduction-with-direction-of-arrival-distribution-based-regularization-and-its-application-to-binaural-centered-mode-matching-2502.16213"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multizone-sound-field-reproduction-with-direction-of-arrival-distribution-based-regularization-and-its-application-to-binaural-centered-mode-matching-2502.16213"/></url>
<url><loc>https://scifaro.com/en/abs/speech-enhancement-using-continuous-embeddings-of-neural-audio-codec-2502.16240</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/speech-enhancement-using-continuous-embeddings-of-neural-audio-codec-2502.16240"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/speech-enhancement-using-continuous-embeddings-of-neural-audio-codec-2502.16240"/></url>
<url><loc>https://scifaro.com/en/abs/voc2vec-a-foundation-model-for-non-verbal-vocalization-2502.16298</loc><lastmod>2025-02-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/voc2vec-a-foundation-model-for-non-verbal-vocalization-2502.16298"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/voc2vec-a-foundation-model-for-non-verbal-vocalization-2502.16298"/></url>
<url><loc>https://scifaro.com/en/abs/balancing-speech-understanding-and-generation-using-continual-pre-training-for-codec-based-speech-llm-2502.16897</loc><lastmod>2025-12-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/balancing-speech-understanding-and-generation-using-continual-pre-training-for-codec-based-speech-llm-2502.16897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/balancing-speech-understanding-and-generation-using-continual-pre-training-for-codec-based-speech-llm-2502.16897"/></url>
<url><loc>https://scifaro.com/en/abs/megatts-3-sparse-alignment-enhanced-latent-diffusion-transformer-for-zero-shot-speech-synthesis-2502.18924</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/megatts-3-sparse-alignment-enhanced-latent-diffusion-transformer-for-zero-shot-speech-synthesis-2502.18924"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/megatts-3-sparse-alignment-enhanced-latent-diffusion-transformer-for-zero-shot-speech-synthesis-2502.18924"/></url>
<url><loc>https://scifaro.com/en/abs/primek-net-multi-scale-spectral-learning-via-group-prime-kernel-convolutional-neural-networks-for-single-channel-speech-enhancement-2502.19906</loc><lastmod>2025-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/primek-net-multi-scale-spectral-learning-via-group-prime-kernel-convolutional-neural-networks-for-single-channel-speech-enhancement-2502.19906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/primek-net-multi-scale-spectral-learning-via-group-prime-kernel-convolutional-neural-networks-for-single-channel-speech-enhancement-2502.19906"/></url>
<url><loc>https://scifaro.com/en/abs/cleanmel-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2502.20040</loc><lastmod>2025-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cleanmel-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2502.20040"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cleanmel-mel-spectrogram-enhancement-for-improving-both-speech-quality-and-asr-2502.20040"/></url>
<url><loc>https://scifaro.com/en/abs/unicodec-unified-audio-codec-with-single-domain-adaptive-codebook-2502.20067</loc><lastmod>2025-02-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unicodec-unified-audio-codec-with-single-domain-adaptive-codebook-2502.20067"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unicodec-unified-audio-codec-with-single-domain-adaptive-codebook-2502.20067"/></url>
<url><loc>https://scifaro.com/en/abs/jitter-jigsaw-temporal-transformer-for-event-reconstruction-for-self-supervised-sound-event-detection-2502.20857</loc><lastmod>2025-03-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/jitter-jigsaw-temporal-transformer-for-event-reconstruction-for-self-supervised-sound-event-detection-2502.20857"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/jitter-jigsaw-temporal-transformer-for-event-reconstruction-for-self-supervised-sound-event-detection-2502.20857"/></url>
<url><loc>https://scifaro.com/en/abs/ul-unas-ultra-lightweight-u-nets-for-real-time-speech-enhancement-via-network-architecture-search-2503.00340</loc><lastmod>2026-02-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ul-unas-ultra-lightweight-u-nets-for-real-time-speech-enhancement-via-network-architecture-search-2503.00340"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ul-unas-ultra-lightweight-u-nets-for-real-time-speech-enhancement-via-network-architecture-search-2503.00340"/></url>
<url><loc>https://scifaro.com/en/abs/llase-g1-incentivizing-generalization-capability-for-llama-based-speech-enhancement-2503.00493</loc><lastmod>2025-06-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/llase-g1-incentivizing-generalization-capability-for-llama-based-speech-enhancement-2503.00493"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/llase-g1-incentivizing-generalization-capability-for-llama-based-speech-enhancement-2503.00493"/></url>
<url><loc>https://scifaro.com/en/abs/uniwav-towards-unified-pre-training-for-speech-representation-learning-and-generation-2503.00733</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uniwav-towards-unified-pre-training-for-speech-representation-learning-and-generation-2503.00733"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uniwav-towards-unified-pre-training-for-speech-representation-learning-and-generation-2503.00733"/></url>
<url><loc>https://scifaro.com/en/abs/diffrhythm-blazingly-fast-and-embarrassingly-simple-end-to-end-full-length-song-generation-with-latent-diffusion-2503.01183</loc><lastmod>2025-03-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffrhythm-blazingly-fast-and-embarrassingly-simple-end-to-end-full-length-song-generation-with-latent-diffusion-2503.01183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffrhythm-blazingly-fast-and-embarrassingly-simple-end-to-end-full-length-song-generation-with-latent-diffusion-2503.01183"/></url>
<url><loc>https://scifaro.com/en/abs/cnn-based-robust-sound-source-localization-with-srp-phat-for-the-extreme-edge-2503.02046</loc><lastmod>2025-03-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cnn-based-robust-sound-source-localization-with-srp-phat-for-the-extreme-edge-2503.02046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cnn-based-robust-sound-source-localization-with-srp-phat-for-the-extreme-edge-2503.02046"/></url>
<url><loc>https://scifaro.com/en/abs/harp-2-0-expanding-hosted-asynchronous-remote-processing-for-deep-learning-in-the-daw-2503.02977</loc><lastmod>2025-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/harp-2-0-expanding-hosted-asynchronous-remote-processing-for-deep-learning-in-the-daw-2503.02977"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/harp-2-0-expanding-hosted-asynchronous-remote-processing-for-deep-learning-in-the-daw-2503.02977"/></url>
<url><loc>https://scifaro.com/en/abs/good-practices-for-evaluation-of-synthesized-speech-2503.03250</loc><lastmod>2025-10-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/good-practices-for-evaluation-of-synthesized-speech-2503.03250"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/good-practices-for-evaluation-of-synthesized-speech-2503.03250"/></url>
<url><loc>https://scifaro.com/en/abs/on-the-relation-between-speech-quality-and-quantized-latent-representations-of-neural-codecs-2503.03304</loc><lastmod>2025-03-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-the-relation-between-speech-quality-and-quantized-latent-representations-of-neural-codecs-2503.03304"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-the-relation-between-speech-quality-and-quantized-latent-representations-of-neural-codecs-2503.03304"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-analysis-of-generalised-echo-and-interference-cancelling-and-extended-multichannel-wiener-filtering-for-combined-noise-reduction-and-acoustic-echo-cancellation-2503.03593</loc><lastmod>2026-01-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-analysis-of-generalised-echo-and-interference-cancelling-and-extended-multichannel-wiener-filtering-for-combined-noise-reduction-and-acoustic-echo-cancellation-2503.03593"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-analysis-of-generalised-echo-and-interference-cancelling-and-extended-multichannel-wiener-filtering-for-combined-noise-reduction-and-acoustic-echo-cancellation-2503.03593"/></url>
<url><loc>https://scifaro.com/en/abs/scaling-rich-style-prompted-text-to-speech-datasets-2503.04713</loc><lastmod>2025-09-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/scaling-rich-style-prompted-text-to-speech-datasets-2503.04713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/scaling-rich-style-prompted-text-to-speech-datasets-2503.04713"/></url>
<url><loc>https://scifaro.com/en/abs/from-voice-to-safety-language-ai-powered-pilot-atc-communication-understanding-for-airport-surface-movement-collision-risk-assessment-2503.04974</loc><lastmod>2025-10-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-voice-to-safety-language-ai-powered-pilot-atc-communication-understanding-for-airport-surface-movement-collision-risk-assessment-2503.04974"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-voice-to-safety-language-ai-powered-pilot-atc-communication-understanding-for-airport-surface-movement-collision-risk-assessment-2503.04974"/></url>
<url><loc>https://scifaro.com/en/abs/musical-source-separation-of-brazilian-percussion-2503.04995</loc><lastmod>2025-07-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/musical-source-separation-of-brazilian-percussion-2503.04995"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/musical-source-separation-of-brazilian-percussion-2503.04995"/></url>
<url><loc>https://scifaro.com/en/abs/prose-diffusion-priors-for-speech-enhancement-2503.06375</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/prose-diffusion-priors-for-speech-enhancement-2503.06375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/prose-diffusion-priors-for-speech-enhancement-2503.06375"/></url>
<url><loc>https://scifaro.com/en/abs/why-pre-trained-models-fail-feature-entanglement-in-multi-modal-depression-detection-2503.06620</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/why-pre-trained-models-fail-feature-entanglement-in-multi-modal-depression-detection-2503.06620"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/why-pre-trained-models-fail-feature-entanglement-in-multi-modal-depression-detection-2503.06620"/></url>
<url><loc>https://scifaro.com/en/abs/score-informed-music-source-separation-improving-synthetic-to-real-generalization-in-classical-music-2503.07352</loc><lastmod>2025-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/score-informed-music-source-separation-improving-synthetic-to-real-generalization-in-classical-music-2503.07352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/score-informed-music-source-separation-improving-synthetic-to-real-generalization-in-classical-music-2503.07352"/></url>
<url><loc>https://scifaro.com/en/abs/impact-of-microphone-array-mismatches-to-learning-based-replay-speech-detection-2503.07357</loc><lastmod>2025-12-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impact-of-microphone-array-mismatches-to-learning-based-replay-speech-detection-2503.07357"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impact-of-microphone-array-mismatches-to-learning-based-replay-speech-detection-2503.07357"/></url>
<url><loc>https://scifaro.com/en/abs/building-english-asr-model-with-regional-language-support-2503.07522</loc><lastmod>2025-03-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/building-english-asr-model-with-regional-language-support-2503.07522"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/building-english-asr-model-with-regional-language-support-2503.07522"/></url>
<url><loc>https://scifaro.com/en/abs/yue-scaling-open-foundation-models-for-long-form-music-generation-2503.08638</loc><lastmod>2025-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/yue-scaling-open-foundation-models-for-long-form-music-generation-2503.08638"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/yue-scaling-open-foundation-models-for-long-form-music-generation-2503.08638"/></url>
<url><loc>https://scifaro.com/en/abs/an-exhaustive-evaluation-of-tts-and-vc-based-data-augmentation-for-asr-2503.08954</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/an-exhaustive-evaluation-of-tts-and-vc-based-data-augmentation-for-asr-2503.08954"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/an-exhaustive-evaluation-of-tts-and-vc-based-data-augmentation-for-asr-2503.08954"/></url>
<url><loc>https://scifaro.com/en/abs/multiple-speaker-separation-from-noisy-sources-in-reverberant-rooms-using-relative-transfer-matrix-2503.09412</loc><lastmod>2025-03-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multiple-speaker-separation-from-noisy-sources-in-reverberant-rooms-using-relative-transfer-matrix-2503.09412"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multiple-speaker-separation-from-noisy-sources-in-reverberant-rooms-using-relative-transfer-matrix-2503.09412"/></url>
<url><loc>https://scifaro.com/en/abs/valsub-subsampling-validation-data-to-mitigate-forgetting-during-asr-personalization-2503.09906</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/valsub-subsampling-validation-data-to-mitigate-forgetting-during-asr-personalization-2503.09906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/valsub-subsampling-validation-data-to-mitigate-forgetting-during-asr-personalization-2503.09906"/></url>
<url><loc>https://scifaro.com/en/abs/sound-field-estimation-theories-and-applications-2503.10016</loc><lastmod>2025-03-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sound-field-estimation-theories-and-applications-2503.10016"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sound-field-estimation-theories-and-applications-2503.10016"/></url>
<url><loc>https://scifaro.com/en/abs/bilingual-dual-head-deep-model-for-parkinson-s-disease-detection-from-speech-2503.10301</loc><lastmod>2025-11-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bilingual-dual-head-deep-model-for-parkinson-s-disease-detection-from-speech-2503.10301"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bilingual-dual-head-deep-model-for-parkinson-s-disease-detection-from-speech-2503.10301"/></url>
<url><loc>https://scifaro.com/en/abs/handling-domain-shifts-for-anomalous-sound-detection-a-review-of-dcase-related-work-2503.10435</loc><lastmod>2025-09-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/handling-domain-shifts-for-anomalous-sound-detection-a-review-of-dcase-related-work-2503.10435"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/handling-domain-shifts-for-anomalous-sound-detection-a-review-of-dcase-related-work-2503.10435"/></url>
<url><loc>https://scifaro.com/en/abs/eeg-based-decoding-of-sound-location-comparing-free-field-to-headphone-based-non-individual-hrtfs-2503.10783</loc><lastmod>2025-03-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/eeg-based-decoding-of-sound-location-comparing-free-field-to-headphone-based-non-individual-hrtfs-2503.10783"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/eeg-based-decoding-of-sound-location-comparing-free-field-to-headphone-based-non-individual-hrtfs-2503.10783"/></url>
<url><loc>https://scifaro.com/en/abs/mavflow-preserving-paralinguistic-elements-with-conditional-flow-matching-for-zero-shot-av2av-multilingual-translation-2503.11026</loc><lastmod>2025-07-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mavflow-preserving-paralinguistic-elements-with-conditional-flow-matching-for-zero-shot-av2av-multilingual-translation-2503.11026"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mavflow-preserving-paralinguistic-elements-with-conditional-flow-matching-for-zero-shot-av2av-multilingual-translation-2503.11026"/></url>
<url><loc>https://scifaro.com/en/abs/adaptive-mixture-of-low-rank-experts-for-robust-audio-spoofing-detection-2503.12010</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/adaptive-mixture-of-low-rank-experts-for-robust-audio-spoofing-detection-2503.12010"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/adaptive-mixture-of-low-rank-experts-for-robust-audio-spoofing-detection-2503.12010"/></url>
<url><loc>https://scifaro.com/en/abs/fnse-sbgan-far-field-speech-enhancement-with-schrodinger-bridge-and-generative-adversarial-networks-2503.12936</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fnse-sbgan-far-field-speech-enhancement-with-schrodinger-bridge-and-generative-adversarial-networks-2503.12936"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fnse-sbgan-far-field-speech-enhancement-with-schrodinger-bridge-and-generative-adversarial-networks-2503.12936"/></url>
<url><loc>https://scifaro.com/en/abs/past-present-and-future-of-spatial-audio-and-room-acoustics-2503.12948</loc><lastmod>2025-03-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/past-present-and-future-of-spatial-audio-and-room-acoustics-2503.12948"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/past-present-and-future-of-spatial-audio-and-room-acoustics-2503.12948"/></url>
<url><loc>https://scifaro.com/en/abs/variational-autoencoder-for-personalized-pathological-speech-enhancement-2503.14036</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/variational-autoencoder-for-personalized-pathological-speech-enhancement-2503.14036"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/variational-autoencoder-for-personalized-pathological-speech-enhancement-2503.14036"/></url>
<url><loc>https://scifaro.com/en/abs/room-impulse-response-estimation-through-optimal-mass-transport-barycenters-2503.14207</loc><lastmod>2025-03-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/room-impulse-response-estimation-through-optimal-mass-transport-barycenters-2503.14207"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/room-impulse-response-estimation-through-optimal-mass-transport-barycenters-2503.14207"/></url>
<url><loc>https://scifaro.com/en/abs/mooncast-high-quality-zero-shot-podcast-generation-2503.14345</loc><lastmod>2025-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mooncast-high-quality-zero-shot-podcast-generation-2503.14345"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mooncast-high-quality-zero-shot-podcast-generation-2503.14345"/></url>
<url><loc>https://scifaro.com/en/abs/analysis-and-extension-of-noisy-target-training-for-unsupervised-target-signal-enhancement-2503.14854</loc><lastmod>2025-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analysis-and-extension-of-noisy-target-training-for-unsupervised-target-signal-enhancement-2503.14854"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analysis-and-extension-of-noisy-target-training-for-unsupervised-target-signal-enhancement-2503.14854"/></url>
<url><loc>https://scifaro.com/en/abs/solla-towards-a-speech-oriented-llm-that-hears-acoustic-context-2503.15338</loc><lastmod>2025-03-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/solla-towards-a-speech-oriented-llm-that-hears-acoustic-context-2503.15338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/solla-towards-a-speech-oriented-llm-that-hears-acoustic-context-2503.15338"/></url>
<url><loc>https://scifaro.com/en/abs/a-speech-production-model-for-radar-connecting-speech-acoustics-with-radar-measured-vibrations-2503.15627</loc><lastmod>2025-03-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-speech-production-model-for-radar-connecting-speech-acoustics-with-radar-measured-vibrations-2503.15627"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-speech-production-model-for-radar-connecting-speech-acoustics-with-radar-measured-vibrations-2503.15627"/></url>
<url><loc>https://scifaro.com/en/abs/from-faces-to-voices-learning-hierarchical-representations-for-high-quality-video-to-speech-2503.16956</loc><lastmod>2025-03-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-faces-to-voices-learning-hierarchical-representations-for-high-quality-video-to-speech-2503.16956"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-faces-to-voices-learning-hierarchical-representations-for-high-quality-video-to-speech-2503.16956"/></url>
<url><loc>https://scifaro.com/en/abs/a-state-of-the-art-review-on-acoustic-preservation-of-historical-worship-spaces-through-auralization-2503.18022</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-state-of-the-art-review-on-acoustic-preservation-of-historical-worship-spaces-through-auralization-2503.18022"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-state-of-the-art-review-on-acoustic-preservation-of-historical-worship-spaces-through-auralization-2503.18022"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-variational-acoustic-clustering-2503.18579</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-variational-acoustic-clustering-2503.18579"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-variational-acoustic-clustering-2503.18579"/></url>
<url><loc>https://scifaro.com/en/abs/target-speaker-selection-for-neural-network-beamforming-in-multi-speaker-scenarios-2503.18590</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/target-speaker-selection-for-neural-network-beamforming-in-multi-speaker-scenarios-2503.18590"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/target-speaker-selection-for-neural-network-beamforming-in-multi-speaker-scenarios-2503.18590"/></url>
<url><loc>https://scifaro.com/en/abs/joint-spectrogram-separation-and-tdoa-estimation-using-optimal-transport-2503.18600</loc><lastmod>2025-03-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/joint-spectrogram-separation-and-tdoa-estimation-using-optimal-transport-2503.18600"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/joint-spectrogram-separation-and-tdoa-estimation-using-optimal-transport-2503.18600"/></url>
<url><loc>https://scifaro.com/en/abs/pitch-contour-exploration-across-audio-domains-a-vision-based-transfer-learning-approach-2503.19161</loc><lastmod>2025-03-26</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pitch-contour-exploration-across-audio-domains-a-vision-based-transfer-learning-approach-2503.19161"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pitch-contour-exploration-across-audio-domains-a-vision-based-transfer-learning-approach-2503.19161"/></url>
<url><loc>https://scifaro.com/en/abs/qualispeech-a-speech-quality-assessment-dataset-with-natural-language-reasoning-and-descriptions-2503.20290</loc><lastmod>2025-06-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qualispeech-a-speech-quality-assessment-dataset-with-natural-language-reasoning-and-descriptions-2503.20290"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qualispeech-a-speech-quality-assessment-dataset-with-natural-language-reasoning-and-descriptions-2503.20290"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-machine-learning-methods-for-distributed-acoustic-sensing-2503.20681</loc><lastmod>2025-03-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-machine-learning-methods-for-distributed-acoustic-sensing-2503.20681"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-machine-learning-methods-for-distributed-acoustic-sensing-2503.20681"/></url>
<url><loc>https://scifaro.com/en/abs/expressive-timing-in-hindustani-vocal-music-2503.21142</loc><lastmod>2025-03-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expressive-timing-in-hindustani-vocal-music-2503.21142"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expressive-timing-in-hindustani-vocal-music-2503.21142"/></url>
<url><loc>https://scifaro.com/en/abs/lend-a-hand-semi-training-free-cued-speech-recognition-via-mllm-driven-hand-modeling-for-barrier-free-communication-2503.21785</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lend-a-hand-semi-training-free-cued-speech-recognition-via-mllm-driven-hand-modeling-for-barrier-free-communication-2503.21785"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lend-a-hand-semi-training-free-cued-speech-recognition-via-mllm-driven-hand-modeling-for-barrier-free-communication-2503.21785"/></url>
<url><loc>https://scifaro.com/en/abs/baseline-systems-and-evaluation-metrics-for-spatial-semantic-segmentation-of-sound-scenes-2503.22088</loc><lastmod>2025-06-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/baseline-systems-and-evaluation-metrics-for-spatial-semantic-segmentation-of-sound-scenes-2503.22088"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/baseline-systems-and-evaluation-metrics-for-spatial-semantic-segmentation-of-sound-scenes-2503.22088"/></url>
<url><loc>https://scifaro.com/en/abs/m2d-clap-exploring-general-purpose-audio-language-representations-beyond-clap-2503.22104</loc><lastmod>2025-09-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/m2d-clap-exploring-general-purpose-audio-language-representations-beyond-clap-2503.22104"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/m2d-clap-exploring-general-purpose-audio-language-representations-beyond-clap-2503.22104"/></url>
<url><loc>https://scifaro.com/en/abs/make-some-noise-towards-llm-audio-reasoning-and-generation-using-sound-tokens-2503.22275</loc><lastmod>2025-03-31</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/make-some-noise-towards-llm-audio-reasoning-and-generation-using-sound-tokens-2503.22275"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/make-some-noise-towards-llm-audio-reasoning-and-generation-using-sound-tokens-2503.22275"/></url>
<url><loc>https://scifaro.com/en/abs/qieemo-speech-is-all-you-need-in-the-emotion-recognition-in-conversations-2503.22687</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/qieemo-speech-is-all-you-need-in-the-emotion-recognition-in-conversations-2503.22687"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/qieemo-speech-is-all-you-need-in-the-emotion-recognition-in-conversations-2503.22687"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-aviation-communication-transcription-fine-tuning-distil-whisper-with-lora-2503.22692</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-aviation-communication-transcription-fine-tuning-distil-whisper-with-lora-2503.22692"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-aviation-communication-transcription-fine-tuning-distil-whisper-with-lora-2503.22692"/></url>
<url><loc>https://scifaro.com/en/abs/audio-compression-using-periodic-gabor-with-biorthogonal-exchange-implementation-using-the-zak-transform-2503.22703</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/audio-compression-using-periodic-gabor-with-biorthogonal-exchange-implementation-using-the-zak-transform-2503.22703"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/audio-compression-using-periodic-gabor-with-biorthogonal-exchange-implementation-using-the-zak-transform-2503.22703"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-nonnative-speech-perception-and-production-through-an-ai-powered-application-2503.22705</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-nonnative-speech-perception-and-production-through-an-ai-powered-application-2503.22705"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-nonnative-speech-perception-and-production-through-an-ai-powered-application-2503.22705"/></url>
<url><loc>https://scifaro.com/en/abs/chirp-localization-via-fine-tuned-transformer-model-a-proof-of-concept-study-2503.22713</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/chirp-localization-via-fine-tuned-transformer-model-a-proof-of-concept-study-2503.22713"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/chirp-localization-via-fine-tuned-transformer-model-a-proof-of-concept-study-2503.22713"/></url>
<url><loc>https://scifaro.com/en/abs/congenital-heart-disease-classification-using-phonocardiograms-a-scalable-screening-tool-for-diverse-environments-2503.22773</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/congenital-heart-disease-classification-using-phonocardiograms-a-scalable-screening-tool-for-diverse-environments-2503.22773"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/congenital-heart-disease-classification-using-phonocardiograms-a-scalable-screening-tool-for-diverse-environments-2503.22773"/></url>
<url><loc>https://scifaro.com/en/abs/the-trajectorir-database-room-acoustic-recordings-along-a-trajectory-of-moving-microphones-2503.23004</loc><lastmod>2026-03-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-trajectorir-database-room-acoustic-recordings-along-a-trajectory-of-moving-microphones-2503.23004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-trajectorir-database-room-acoustic-recordings-along-a-trajectory-of-moving-microphones-2503.23004"/></url>
<url><loc>https://scifaro.com/en/abs/supertonictts-towards-highly-efficient-and-streamlined-text-to-speech-system-2503.23108</loc><lastmod>2025-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/supertonictts-towards-highly-efficient-and-streamlined-text-to-speech-system-2503.23108"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/supertonictts-towards-highly-efficient-and-streamlined-text-to-speech-system-2503.23108"/></url>
<url><loc>https://scifaro.com/en/abs/aurelia-test-time-reasoning-distillation-in-audio-visual-llms-2503.23219</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aurelia-test-time-reasoning-distillation-in-audio-visual-llms-2503.23219"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aurelia-test-time-reasoning-distillation-in-audio-visual-llms-2503.23219"/></url>
<url><loc>https://scifaro.com/en/abs/a-first-order-dirac-based-parametric-ambisonic-coder-for-immersive-communications-2503.23586</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-first-order-dirac-based-parametric-ambisonic-coder-for-immersive-communications-2503.23586"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-first-order-dirac-based-parametric-ambisonic-coder-for-immersive-communications-2503.23586"/></url>
<url><loc>https://scifaro.com/en/abs/aud-sur-an-audio-analyzer-assistant-for-audio-surveillance-applications-2503.23827</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aud-sur-an-audio-analyzer-assistant-for-audio-surveillance-applications-2503.23827"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aud-sur-an-audio-analyzer-assistant-for-audio-surveillance-applications-2503.23827"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-in-context-learning-capabilities-of-chatgpt-for-pathological-speech-detection-2503.23873</loc><lastmod>2025-04-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-in-context-learning-capabilities-of-chatgpt-for-pathological-speech-detection-2503.23873"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-in-context-learning-capabilities-of-chatgpt-for-pathological-speech-detection-2503.23873"/></url>
<url><loc>https://scifaro.com/en/abs/is-asmr-engineerable-a-signal-processing-and-user-experience-study-2504.00621</loc><lastmod>2026-04-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-asmr-engineerable-a-signal-processing-and-user-experience-study-2504.00621"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-asmr-engineerable-a-signal-processing-and-user-experience-study-2504.00621"/></url>
<url><loc>https://scifaro.com/en/abs/expanding-and-analyzing-odaq-the-open-dataset-of-audio-quality-2504.00742</loc><lastmod>2025-04-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/expanding-and-analyzing-odaq-the-open-dataset-of-audio-quality-2504.00742"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/expanding-and-analyzing-odaq-the-open-dataset-of-audio-quality-2504.00742"/></url>
<url><loc>https://scifaro.com/en/abs/spatial-filter-bank-based-neural-method-for-multichannel-speech-enhancement-2504.01392</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatial-filter-bank-based-neural-method-for-multichannel-speech-enhancement-2504.01392"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatial-filter-bank-based-neural-method-for-multichannel-speech-enhancement-2504.01392"/></url>
<url><loc>https://scifaro.com/en/abs/leveraging-embedding-techniques-in-multimodal-machine-learning-for-mental-illness-assessment-2504.01767</loc><lastmod>2025-04-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/leveraging-embedding-techniques-in-multimodal-machine-learning-for-mental-illness-assessment-2504.01767"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/leveraging-embedding-techniques-in-multimodal-machine-learning-for-mental-illness-assessment-2504.01767"/></url>
<url><loc>https://scifaro.com/en/abs/mind-the-prompt-prompting-strategies-in-audio-generations-for-improving-sound-classification-2504.03329</loc><lastmod>2025-04-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/mind-the-prompt-prompting-strategies-in-audio-generations-for-improving-sound-classification-2504.03329"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/mind-the-prompt-prompting-strategies-in-audio-generations-for-improving-sound-classification-2504.03329"/></url>
<url><loc>https://scifaro.com/en/abs/real-time-auralization-for-first-person-vocal-interaction-in-immersive-virtual-environments-2504.04075</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/real-time-auralization-for-first-person-vocal-interaction-in-immersive-virtual-environments-2504.04075"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/real-time-auralization-for-first-person-vocal-interaction-in-immersive-virtual-environments-2504.04075"/></url>
<url><loc>https://scifaro.com/en/abs/wavenet-volterra-neural-networks-for-active-noise-control-a-fully-causal-approach-2504.04450</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavenet-volterra-neural-networks-for-active-noise-control-a-fully-causal-approach-2504.04450"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavenet-volterra-neural-networks-for-active-noise-control-a-fully-causal-approach-2504.04450"/></url>
<url><loc>https://scifaro.com/en/abs/trainable-adaptive-score-normalization-for-automatic-speaker-verification-2504.04512</loc><lastmod>2025-04-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/trainable-adaptive-score-normalization-for-automatic-speaker-verification-2504.04512"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/trainable-adaptive-score-normalization-for-automatic-speaker-verification-2504.04512"/></url>
<url><loc>https://scifaro.com/en/abs/bridging-the-gap-between-continuous-and-informative-discrete-representations-by-random-product-quantization-2504.04721</loc><lastmod>2025-11-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/bridging-the-gap-between-continuous-and-informative-discrete-representations-by-random-product-quantization-2504.04721"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/bridging-the-gap-between-continuous-and-informative-discrete-representations-by-random-product-quantization-2504.04721"/></url>
<url><loc>https://scifaro.com/en/abs/unsupervised-estimation-of-nonlinear-audio-effects-comparing-diffusion-based-and-adversarial-approaches-2504.04751</loc><lastmod>2025-09-25</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/unsupervised-estimation-of-nonlinear-audio-effects-comparing-diffusion-based-and-adversarial-approaches-2504.04751"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/unsupervised-estimation-of-nonlinear-audio-effects-comparing-diffusion-based-and-adversarial-approaches-2504.04751"/></url>
<url><loc>https://scifaro.com/en/abs/nes2net-a-lightweight-nested-architecture-for-foundation-model-driven-speech-anti-spoofing-2504.05657</loc><lastmod>2025-10-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/nes2net-a-lightweight-nested-architecture-for-foundation-model-driven-speech-anti-spoofing-2504.05657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/nes2net-a-lightweight-nested-architecture-for-foundation-model-driven-speech-anti-spoofing-2504.05657"/></url>
<url><loc>https://scifaro.com/en/abs/rnn-transducer-based-losses-for-speech-recognition-on-noisy-targets-2504.06963</loc><lastmod>2025-04-10</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rnn-transducer-based-losses-for-speech-recognition-on-noisy-targets-2504.06963"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rnn-transducer-based-losses-for-speech-recognition-on-noisy-targets-2504.06963"/></url>
<url><loc>https://scifaro.com/en/abs/categorical-unsupervised-variational-acoustic-clustering-2504.07652</loc><lastmod>2026-01-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/categorical-unsupervised-variational-acoustic-clustering-2504.07652"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/categorical-unsupervised-variational-acoustic-clustering-2504.07652"/></url>
<url><loc>https://scifaro.com/en/abs/usm-vc-mitigating-timbre-leakage-with-universal-semantic-mapping-residual-block-for-voice-conversion-2504.08524</loc><lastmod>2025-06-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/usm-vc-mitigating-timbre-leakage-with-universal-semantic-mapping-residual-block-for-voice-conversion-2504.08524"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/usm-vc-mitigating-timbre-leakage-with-universal-semantic-mapping-residual-block-for-voice-conversion-2504.08524"/></url>
<url><loc>https://scifaro.com/en/abs/torchfx-a-modern-approach-to-audio-dsp-with-pytorch-and-gpu-acceleration-2504.08624</loc><lastmod>2025-04-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/torchfx-a-modern-approach-to-audio-dsp-with-pytorch-and-gpu-acceleration-2504.08624"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/torchfx-a-modern-approach-to-audio-dsp-with-pytorch-and-gpu-acceleration-2504.08624"/></url>
<url><loc>https://scifaro.com/en/abs/reverberation-based-features-for-sound-event-localization-and-detection-with-distance-estimation-2504.08644</loc><lastmod>2026-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/reverberation-based-features-for-sound-event-localization-and-detection-with-distance-estimation-2504.08644"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/reverberation-based-features-for-sound-event-localization-and-detection-with-distance-estimation-2504.08644"/></url>
<url><loc>https://scifaro.com/en/abs/beyond-global-metrics-a-fairness-analysis-for-interpretable-voice-disorder-detection-systems-2504.08997</loc><lastmod>2025-04-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/beyond-global-metrics-a-fairness-analysis-for-interpretable-voice-disorder-detection-systems-2504.08997"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/beyond-global-metrics-a-fairness-analysis-for-interpretable-voice-disorder-detection-systems-2504.08997"/></url>
<url><loc>https://scifaro.com/en/abs/sift-50m-a-large-scale-multilingual-dataset-for-speech-instruction-fine-tuning-2504.09081</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/sift-50m-a-large-scale-multilingual-dataset-for-speech-instruction-fine-tuning-2504.09081"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/sift-50m-a-large-scale-multilingual-dataset-for-speech-instruction-fine-tuning-2504.09081"/></url>
<url><loc>https://scifaro.com/en/abs/ditse-high-fidelity-generative-speech-enhancement-via-latent-diffusion-transformers-2504.09381</loc><lastmod>2025-09-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/ditse-high-fidelity-generative-speech-enhancement-via-latent-diffusion-transformers-2504.09381"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/ditse-high-fidelity-generative-speech-enhancement-via-latent-diffusion-transformers-2504.09381"/></url>
<url><loc>https://scifaro.com/en/abs/pseudo-autoregressive-neural-codec-language-models-for-efficient-zero-shot-text-to-speech-synthesis-2504.10352</loc><lastmod>2025-08-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/pseudo-autoregressive-neural-codec-language-models-for-efficient-zero-shot-text-to-speech-synthesis-2504.10352"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/pseudo-autoregressive-neural-codec-language-models-for-efficient-zero-shot-text-to-speech-synthesis-2504.10352"/></url>
<url><loc>https://scifaro.com/en/abs/respiratory-inhaler-sound-event-classification-using-self-supervised-learning-2504.11246</loc><lastmod>2025-04-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/respiratory-inhaler-sound-event-classification-using-self-supervised-learning-2504.11246"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/respiratory-inhaler-sound-event-classification-using-self-supervised-learning-2504.11246"/></url>
<url><loc>https://scifaro.com/en/abs/benchmarking-audio-deepfake-detection-robustness-in-real-world-communication-scenarios-2504.12423</loc><lastmod>2026-05-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/benchmarking-audio-deepfake-detection-robustness-in-real-world-communication-scenarios-2504.12423"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/benchmarking-audio-deepfake-detection-robustness-in-real-world-communication-scenarios-2504.12423"/></url>
<url><loc>https://scifaro.com/en/abs/temporal-attention-pooling-for-frequency-dynamic-convolution-in-sound-event-detection-2504.12670</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/temporal-attention-pooling-for-frequency-dynamic-convolution-in-sound-event-detection-2504.12670"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/temporal-attention-pooling-for-frequency-dynamic-convolution-in-sound-event-detection-2504.12670"/></url>
<url><loc>https://scifaro.com/en/abs/emovoice-llm-based-emotional-text-to-speech-model-with-freestyle-text-prompting-2504.12867</loc><lastmod>2025-08-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/emovoice-llm-based-emotional-text-to-speech-model-with-freestyle-text-prompting-2504.12867"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/emovoice-llm-based-emotional-text-to-speech-model-with-freestyle-text-prompting-2504.12867"/></url>
<url><loc>https://scifaro.com/en/abs/cst-former-multidimensional-attention-based-transformer-for-sound-event-localization-and-detection-in-real-scenes-2504.12870</loc><lastmod>2025-04-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/cst-former-multidimensional-attention-based-transformer-for-sound-event-localization-and-detection-in-real-scenes-2504.12870"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/cst-former-multidimensional-attention-based-transformer-for-sound-event-localization-and-detection-in-real-scenes-2504.12870"/></url>
<url><loc>https://scifaro.com/en/abs/modeling-l1-influence-on-l2-pronunciation-an-mfcc-based-framework-for-explainable-machine-learning-and-pedagogical-feedback-2504.13765</loc><lastmod>2025-04-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/modeling-l1-influence-on-l2-pronunciation-an-mfcc-based-framework-for-explainable-machine-learning-and-pedagogical-feedback-2504.13765"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/modeling-l1-influence-on-l2-pronunciation-an-mfcc-based-framework-for-explainable-machine-learning-and-pedagogical-feedback-2504.13765"/></url>
<url><loc>https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-2504.14183</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-2504.14183"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-first-voiceprivacy-attacker-challenge-2504.14183"/></url>
<url><loc>https://scifaro.com/en/abs/data-augmentation-using-neural-acoustic-fields-with-retrieval-augmented-pre-training-2504.14409</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/data-augmentation-using-neural-acoustic-fields-with-retrieval-augmented-pre-training-2504.14409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/data-augmentation-using-neural-acoustic-fields-with-retrieval-augmented-pre-training-2504.14409"/></url>
<url><loc>https://scifaro.com/en/abs/predicting-speech-intelligibility-in-older-adults-for-speech-enhancement-using-the-gammachirp-envelope-similarity-index-gesi-2504.14437</loc><lastmod>2025-10-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/predicting-speech-intelligibility-in-older-adults-for-speech-enhancement-using-the-gammachirp-envelope-similarity-index-gesi-2504.14437"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/predicting-speech-intelligibility-in-older-adults-for-speech-enhancement-using-the-gammachirp-envelope-similarity-index-gesi-2504.14437"/></url>
<url><loc>https://scifaro.com/en/abs/dnn-based-hrirs-identification-with-a-continuously-rotating-speaker-array-2504.14817</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dnn-based-hrirs-identification-with-a-continuously-rotating-speaker-array-2504.14817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dnn-based-hrirs-identification-with-a-continuously-rotating-speaker-array-2504.14817"/></url>
<url><loc>https://scifaro.com/en/abs/quantitative-measures-for-passive-sonar-texture-analysis-2504.14843</loc><lastmod>2026-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantitative-measures-for-passive-sonar-texture-analysis-2504.14843"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantitative-measures-for-passive-sonar-texture-analysis-2504.14843"/></url>
<url><loc>https://scifaro.com/en/abs/omniaudio-generating-spatial-audio-from-360-degree-video-2504.14906</loc><lastmod>2025-06-04</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omniaudio-generating-spatial-audio-from-360-degree-video-2504.14906"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omniaudio-generating-spatial-audio-from-360-degree-video-2504.14906"/></url>
<url><loc>https://scifaro.com/en/abs/stablequant-layer-adaptive-post-training-quantization-for-speech-foundation-models-2504.14915</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/stablequant-layer-adaptive-post-training-quantization-for-speech-foundation-models-2504.14915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/stablequant-layer-adaptive-post-training-quantization-for-speech-foundation-models-2504.14915"/></url>
<url><loc>https://scifaro.com/en/abs/on-feature-representations-for-marmoset-vocal-communication-analysis-2504.14981</loc><lastmod>2025-04-22</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/on-feature-representations-for-marmoset-vocal-communication-analysis-2504.14981"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/on-feature-representations-for-marmoset-vocal-communication-analysis-2504.14981"/></url>
<url><loc>https://scifaro.com/en/abs/exploring-the-user-experience-of-ai-assisted-sound-searching-systems-for-creative-workflows-2504.15575</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/exploring-the-user-experience-of-ai-assisted-sound-searching-systems-for-creative-workflows-2504.15575"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/exploring-the-user-experience-of-ai-assisted-sound-searching-systems-for-creative-workflows-2504.15575"/></url>
<url><loc>https://scifaro.com/en/abs/fadel-uncertainty-aware-fake-audio-detection-with-evidential-deep-learning-2504.15663</loc><lastmod>2025-04-23</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fadel-uncertainty-aware-fake-audio-detection-with-evidential-deep-learning-2504.15663"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fadel-uncertainty-aware-fake-audio-detection-with-evidential-deep-learning-2504.15663"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-audio-coding-a-40-year-historical-perspective-2504.16223</loc><lastmod>2025-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-audio-coding-a-40-year-historical-perspective-2504.16223"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-audio-coding-a-40-year-historical-perspective-2504.16223"/></url>
<url><loc>https://scifaro.com/en/abs/deep-data-driven-modeling-of-room-acoustics-literature-review-and-research-perspectives-2504.16289</loc><lastmod>2025-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/deep-data-driven-modeling-of-room-acoustics-literature-review-and-research-perspectives-2504.16289"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/deep-data-driven-modeling-of-room-acoustics-literature-review-and-research-perspectives-2504.16289"/></url>
<url><loc>https://scifaro.com/en/abs/socov-semi-orthogonal-parametric-pooling-of-covariance-matrix-for-speaker-recognition-2504.16441</loc><lastmod>2025-04-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/socov-semi-orthogonal-parametric-pooling-of-covariance-matrix-for-speaker-recognition-2504.16441"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/socov-semi-orthogonal-parametric-pooling-of-covariance-matrix-for-speaker-recognition-2504.16441"/></url>
<url><loc>https://scifaro.com/en/abs/generating-localized-audible-zones-using-a-single-channel-parametric-loudspeaker-2504.17440</loc><lastmod>2026-05-11</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/generating-localized-audible-zones-using-a-single-channel-parametric-loudspeaker-2504.17440"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/generating-localized-audible-zones-using-a-single-channel-parametric-loudspeaker-2504.17440"/></url>
<url><loc>https://scifaro.com/en/abs/assessing-the-utility-of-audio-foundation-models-for-heart-and-respiratory-sound-analysis-2504.18004</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/assessing-the-utility-of-audio-foundation-models-for-heart-and-respiratory-sound-analysis-2504.18004"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/assessing-the-utility-of-audio-foundation-models-for-heart-and-respiratory-sound-analysis-2504.18004"/></url>
<url><loc>https://scifaro.com/en/abs/dose-drum-one-shot-extraction-from-music-mixture-2504.18157</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/dose-drum-one-shot-extraction-from-music-mixture-2504.18157"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/dose-drum-one-shot-extraction-from-music-mixture-2504.18157"/></url>
<url><loc>https://scifaro.com/en/abs/kimi-audio-technical-report-2504.18425</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/kimi-audio-technical-report-2504.18425"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/kimi-audio-technical-report-2504.18425"/></url>
<url><loc>https://scifaro.com/en/abs/music-tempo-estimation-on-solo-instrumental-performance-2504.18502</loc><lastmod>2025-04-28</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/music-tempo-estimation-on-solo-instrumental-performance-2504.18502"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/music-tempo-estimation-on-solo-instrumental-performance-2504.18502"/></url>
<url><loc>https://scifaro.com/en/abs/multi-task-corrupted-prediction-for-learning-robust-audio-visual-speech-representation-2504.18539</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/multi-task-corrupted-prediction-for-learning-robust-audio-visual-speech-representation-2504.18539"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/multi-task-corrupted-prediction-for-learning-robust-audio-visual-speech-representation-2504.18539"/></url>
<url><loc>https://scifaro.com/en/abs/enhancing-cochlear-implant-signal-coding-with-scaled-dot-product-attention-2504.19046</loc><lastmod>2025-04-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/enhancing-cochlear-implant-signal-coding-with-scaled-dot-product-attention-2504.19046"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/enhancing-cochlear-implant-signal-coding-with-scaled-dot-product-attention-2504.19046"/></url>
<url><loc>https://scifaro.com/en/abs/versatile-framework-for-song-generation-with-prompt-based-control-2504.19062</loc><lastmod>2026-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/versatile-framework-for-song-generation-with-prompt-based-control-2504.19062"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/versatile-framework-for-song-generation-with-prompt-based-control-2504.19062"/></url>
<url><loc>https://scifaro.com/en/abs/a-comparative-study-on-positional-encoding-for-time-frequency-domain-dual-path-transformer-based-source-separation-models-2504.19605</loc><lastmod>2025-06-03</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-comparative-study-on-positional-encoding-for-time-frequency-domain-dual-path-transformer-based-source-separation-models-2504.19605"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-comparative-study-on-positional-encoding-for-time-frequency-domain-dual-path-transformer-based-source-separation-models-2504.19605"/></url>
<url><loc>https://scifaro.com/en/abs/towards-flow-matching-based-tts-without-classifier-free-guidance-2504.20334</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/towards-flow-matching-based-tts-without-classifier-free-guidance-2504.20334"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/towards-flow-matching-based-tts-without-classifier-free-guidance-2504.20334"/></url>
<url><loc>https://scifaro.com/en/abs/isdrama-immersive-spatial-drama-generation-through-multimodal-prompting-2504.20630</loc><lastmod>2026-02-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/isdrama-immersive-spatial-drama-generation-through-multimodal-prompting-2504.20630"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/isdrama-immersive-spatial-drama-generation-through-multimodal-prompting-2504.20630"/></url>
<url><loc>https://scifaro.com/en/abs/impairments-are-clustered-in-latents-of-deep-neural-network-based-speech-quality-models-2504.21528</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/impairments-are-clustered-in-latents-of-deep-neural-network-based-speech-quality-models-2504.21528"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/impairments-are-clustered-in-latents-of-deep-neural-network-based-speech-quality-models-2504.21528"/></url>
<url><loc>https://scifaro.com/en/abs/from-aesthetics-to-human-preferences-comparative-perspectives-of-evaluating-text-to-music-systems-2504.21815</loc><lastmod>2025-05-01</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-aesthetics-to-human-preferences-comparative-perspectives-of-evaluating-text-to-music-systems-2504.21815"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-aesthetics-to-human-preferences-comparative-perspectives-of-evaluating-text-to-music-systems-2504.21815"/></url>
<url><loc>https://scifaro.com/en/abs/discovering-phoneme-specific-critical-articulators-through-a-data-driven-approach-2505.00007</loc><lastmod>2025-05-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discovering-phoneme-specific-critical-articulators-through-a-data-driven-approach-2505.00007"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discovering-phoneme-specific-critical-articulators-through-a-data-driven-approach-2505.00007"/></url>
<url><loc>https://scifaro.com/en/abs/perceptual-implications-of-automatic-anonymization-in-pathological-speech-2505.00409</loc><lastmod>2026-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/perceptual-implications-of-automatic-anonymization-in-pathological-speech-2505.00409"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/perceptual-implications-of-automatic-anonymization-in-pathological-speech-2505.00409"/></url>
<url><loc>https://scifaro.com/en/abs/physics-informed-neural-network-driven-sparse-field-discretization-method-for-near-field-acoustic-holography-2505.00897</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/physics-informed-neural-network-driven-sparse-field-discretization-method-for-near-field-acoustic-holography-2505.00897"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/physics-informed-neural-network-driven-sparse-field-discretization-method-for-near-field-acoustic-holography-2505.00897"/></url>
<url><loc>https://scifaro.com/en/abs/how-much-to-dereverberate-low-latency-single-channel-speech-enhancement-in-distant-microphone-scenarios-2505.01338</loc><lastmod>2025-05-05</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/how-much-to-dereverberate-low-latency-single-channel-speech-enhancement-in-distant-microphone-scenarios-2505.01338"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/how-much-to-dereverberate-low-latency-single-channel-speech-enhancement-in-distant-microphone-scenarios-2505.01338"/></url>
<url><loc>https://scifaro.com/en/abs/transfer-learning-based-deep-residual-learning-for-speech-recognition-in-clean-and-noisy-environments-2505.01632</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/transfer-learning-based-deep-residual-learning-for-speech-recognition-in-clean-and-noisy-environments-2505.01632"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/transfer-learning-based-deep-residual-learning-for-speech-recognition-in-clean-and-noisy-environments-2505.01632"/></url>
<url><loc>https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-with-device-information-in-the-dcase-2025-challenge-2505.01747</loc><lastmod>2026-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-with-device-information-in-the-dcase-2025-challenge-2505.01747"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/low-complexity-acoustic-scene-classification-with-device-information-in-the-dcase-2025-challenge-2505.01747"/></url>
<url><loc>https://scifaro.com/en/abs/flower-flow-based-estimated-gaussian-guidance-for-general-speech-restoration-2505.01750</loc><lastmod>2025-05-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flower-flow-based-estimated-gaussian-guidance-for-general-speech-restoration-2505.01750"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flower-flow-based-estimated-gaussian-guidance-for-general-speech-restoration-2505.01750"/></url>
<url><loc>https://scifaro.com/en/abs/the-search-for-squawk-agile-modeling-in-bioacoustics-2505.03071</loc><lastmod>2025-06-12</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/the-search-for-squawk-agile-modeling-in-bioacoustics-2505.03071"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/the-search-for-squawk-agile-modeling-in-bioacoustics-2505.03071"/></url>
<url><loc>https://scifaro.com/en/abs/fairness-of-automatic-speech-recognition-in-cleft-lip-and-palate-speech-2505.03697</loc><lastmod>2025-05-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/fairness-of-automatic-speech-recognition-in-cleft-lip-and-palate-speech-2505.03697"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/fairness-of-automatic-speech-recognition-in-cleft-lip-and-palate-speech-2505.03697"/></url>
<url><loc>https://scifaro.com/en/abs/aliasing-reduction-in-neural-amp-modeling-by-smoothing-activations-2505.04082</loc><lastmod>2025-06-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/aliasing-reduction-in-neural-amp-modeling-by-smoothing-activations-2505.04082"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/aliasing-reduction-in-neural-amp-modeling-by-smoothing-activations-2505.04082"/></url>
<url><loc>https://scifaro.com/en/abs/robust-speech-recognition-with-schr-odinger-bridge-based-speech-enhancement-2505.04237</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/robust-speech-recognition-with-schr-odinger-bridge-based-speech-enhancement-2505.04237"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/robust-speech-recognition-with-schr-odinger-bridge-based-speech-enhancement-2505.04237"/></url>
<url><loc>https://scifaro.com/en/abs/discrete-optimal-transport-and-voice-conversion-2505.04382</loc><lastmod>2026-03-02</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/discrete-optimal-transport-and-voice-conversion-2505.04382"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/discrete-optimal-transport-and-voice-conversion-2505.04382"/></url>
<url><loc>https://scifaro.com/en/abs/recognizing-ornaments-in-vocal-indian-art-music-with-active-annotation-2505.04419</loc><lastmod>2025-05-09</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/recognizing-ornaments-in-vocal-indian-art-music-with-active-annotation-2505.04419"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/recognizing-ornaments-in-vocal-indian-art-music-with-active-annotation-2505.04419"/></url>
<url><loc>https://scifaro.com/en/abs/accelerating-audio-research-with-robotic-dummy-heads-2505.04548</loc><lastmod>2025-05-08</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/accelerating-audio-research-with-robotic-dummy-heads-2505.04548"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/accelerating-audio-research-with-robotic-dummy-heads-2505.04548"/></url>
<url><loc>https://scifaro.com/en/abs/from-dialect-gaps-to-identity-maps-tackling-variability-in-speaker-verification-2505.04629</loc><lastmod>2025-10-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/from-dialect-gaps-to-identity-maps-tackling-variability-in-speaker-verification-2505.04629"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/from-dialect-gaps-to-identity-maps-tackling-variability-in-speaker-verification-2505.04629"/></url>
<url><loc>https://scifaro.com/en/abs/listen-to-extract-onset-prompted-target-speaker-extraction-2505.05114</loc><lastmod>2025-11-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/listen-to-extract-onset-prompted-target-speaker-extraction-2505.05114"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/listen-to-extract-onset-prompted-target-speaker-extraction-2505.05114"/></url>
<url><loc>https://scifaro.com/en/abs/uncertainty-quantification-in-melody-estimation-using-histogram-representation-2505.05156</loc><lastmod>2025-11-07</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/uncertainty-quantification-in-melody-estimation-using-histogram-representation-2505.05156"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/uncertainty-quantification-in-melody-estimation-using-histogram-representation-2505.05156"/></url>
<url><loc>https://scifaro.com/en/abs/flexspeech-towards-stable-controllable-and-expressive-text-to-speech-2505.05159</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/flexspeech-towards-stable-controllable-and-expressive-text-to-speech-2505.05159"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/flexspeech-towards-stable-controllable-and-expressive-text-to-speech-2505.05159"/></url>
<url><loc>https://scifaro.com/en/abs/do-we-need-ema-for-diffusion-based-speech-enhancement-toward-a-magnitude-preserving-network-architecture-2505.05216</loc><lastmod>2026-01-30</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/do-we-need-ema-for-diffusion-based-speech-enhancement-toward-a-magnitude-preserving-network-architecture-2505.05216"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/do-we-need-ema-for-diffusion-based-speech-enhancement-toward-a-magnitude-preserving-network-architecture-2505.05216"/></url>
<url><loc>https://scifaro.com/en/abs/arraydps-unsupervised-blind-speech-separation-with-a-diffusion-prior-2505.05657</loc><lastmod>2025-06-18</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/arraydps-unsupervised-blind-speech-separation-with-a-diffusion-prior-2505.05657"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/arraydps-unsupervised-blind-speech-separation-with-a-diffusion-prior-2505.05657"/></url>
<url><loc>https://scifaro.com/en/abs/rade-a-neural-codec-for-transmitting-speech-over-hf-radio-channels-2505.06671</loc><lastmod>2025-07-29</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/rade-a-neural-codec-for-transmitting-speech-over-hf-radio-channels-2505.06671"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/rade-a-neural-codec-for-transmitting-speech-over-hf-radio-channels-2505.06671"/></url>
<url><loc>https://scifaro.com/en/abs/tacos-temporally-aligned-audio-captions-for-language-audio-pretraining-2505.07609</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/tacos-temporally-aligned-audio-captions-for-language-audio-pretraining-2505.07609"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/tacos-temporally-aligned-audio-captions-for-language-audio-pretraining-2505.07609"/></url>
<url><loc>https://scifaro.com/en/abs/diffused-responsibility-analyzing-the-energy-consumption-of-generative-text-to-audio-diffusion-models-2505.07615</loc><lastmod>2025-07-17</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/diffused-responsibility-analyzing-the-energy-consumption-of-generative-text-to-audio-diffusion-models-2505.07615"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/diffused-responsibility-analyzing-the-energy-consumption-of-generative-text-to-audio-diffusion-models-2505.07615"/></url>
<url><loc>https://scifaro.com/en/abs/is-mixit-really-unsuitable-for-correlated-sources-exploring-mixit-for-unsupervised-pre-training-in-music-source-separation-2505.07631</loc><lastmod>2025-05-13</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/is-mixit-really-unsuitable-for-correlated-sources-exploring-mixit-for-unsupervised-pre-training-in-music-source-separation-2505.07631"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/is-mixit-really-unsuitable-for-correlated-sources-exploring-mixit-for-unsupervised-pre-training-in-music-source-separation-2505.07631"/></url>
<url><loc>https://scifaro.com/en/abs/minimax-speech-intrinsic-zero-shot-text-to-speech-with-a-learnable-speaker-encoder-2505.07916</loc><lastmod>2025-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/minimax-speech-intrinsic-zero-shot-text-to-speech-with-a-learnable-speaker-encoder-2505.07916"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/minimax-speech-intrinsic-zero-shot-text-to-speech-with-a-learnable-speaker-encoder-2505.07916"/></url>
<url><loc>https://scifaro.com/en/abs/investigating-self-supervised-features-for-expressive-multilingual-voice-conversion-2505.08278</loc><lastmod>2025-05-14</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/investigating-self-supervised-features-for-expressive-multilingual-voice-conversion-2505.08278"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/investigating-self-supervised-features-for-expressive-multilingual-voice-conversion-2505.08278"/></url>
<url><loc>https://scifaro.com/en/abs/a-survey-of-deep-learning-for-complex-speech-spectrograms-2505.08694</loc><lastmod>2025-10-06</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/a-survey-of-deep-learning-for-complex-speech-spectrograms-2505.08694"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/a-survey-of-deep-learning-for-complex-speech-spectrograms-2505.08694"/></url>
<url><loc>https://scifaro.com/en/abs/granite-speech-open-source-speech-aware-llms-with-strong-english-asr-capabilities-2505.08699</loc><lastmod>2025-05-15</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/granite-speech-open-source-speech-aware-llms-with-strong-english-asr-capabilities-2505.08699"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/granite-speech-open-source-speech-aware-llms-with-strong-english-asr-capabilities-2505.08699"/></url>
<url><loc>https://scifaro.com/en/abs/omni-r1-do-you-really-need-audio-to-fine-tune-your-audio-llm-2505.09439</loc><lastmod>2025-11-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/omni-r1-do-you-really-need-audio-to-fine-tune-your-audio-llm-2505.09439"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/omni-r1-do-you-really-need-audio-to-fine-tune-your-audio-llm-2505.09439"/></url>
<url><loc>https://scifaro.com/en/abs/wavreward-spoken-dialogue-models-with-generalist-reward-evaluators-2505.09558</loc><lastmod>2025-09-24</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/wavreward-spoken-dialogue-models-with-generalist-reward-evaluators-2505.09558"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/wavreward-spoken-dialogue-models-with-generalist-reward-evaluators-2505.09558"/></url>
<url><loc>https://scifaro.com/en/abs/who-said-what-wsw-2-0-enhanced-automated-analysis-of-preschool-classroom-speech-2505.09972</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/who-said-what-wsw-2-0-enhanced-automated-analysis-of-preschool-classroom-speech-2505.09972"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/who-said-what-wsw-2-0-enhanced-automated-analysis-of-preschool-classroom-speech-2505.09972"/></url>
<url><loc>https://scifaro.com/en/abs/spatially-selective-active-noise-control-for-open-fitting-hearables-with-acausal-optimization-2505.10372</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/spatially-selective-active-noise-control-for-open-fitting-hearables-with-acausal-optimization-2505.10372"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/spatially-selective-active-noise-control-for-open-fitting-hearables-with-acausal-optimization-2505.10372"/></url>
<url><loc>https://scifaro.com/en/abs/quantized-approximate-signal-processing-qasp-towards-homomorphic-encryption-for-audio-2505.10500</loc><lastmod>2025-05-16</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/quantized-approximate-signal-processing-qasp-towards-homomorphic-encryption-for-audio-2505.10500"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/quantized-approximate-signal-processing-qasp-towards-homomorphic-encryption-for-audio-2505.10500"/></url>
<url><loc>https://scifaro.com/en/abs/songeval-a-benchmark-dataset-for-song-aesthetics-evaluation-2505.10793</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/songeval-a-benchmark-dataset-for-song-aesthetics-evaluation-2505.10793"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/songeval-a-benchmark-dataset-for-song-aesthetics-evaluation-2505.10793"/></url>
<url><loc>https://scifaro.com/en/abs/anti-aliasing-of-neural-distortion-effects-via-model-fine-tuning-2505.11375</loc><lastmod>2025-05-19</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/anti-aliasing-of-neural-distortion-effects-via-model-fine-tuning-2505.11375"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/anti-aliasing-of-neural-distortion-effects-via-model-fine-tuning-2505.11375"/></url>
<url><loc>https://scifaro.com/en/abs/lipdiffuser-lip-to-speech-generation-with-conditional-diffusion-models-2505.11391</loc><lastmod>2025-10-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/lipdiffuser-lip-to-speech-generation-with-conditional-diffusion-models-2505.11391"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/lipdiffuser-lip-to-speech-generation-with-conditional-diffusion-models-2505.11391"/></url>
<url><loc>https://scifaro.com/en/abs/analytickws-towards-exemplar-free-analytic-class-incremental-learning-for-small-footprint-keyword-spotting-2505.11817</loc><lastmod>2025-05-20</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/analytickws-towards-exemplar-free-analytic-class-incremental-learning-for-small-footprint-keyword-spotting-2505.11817"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/analytickws-towards-exemplar-free-analytic-class-incremental-learning-for-small-footprint-keyword-spotting-2505.11817"/></url>
<url><loc>https://scifaro.com/en/abs/revisiting-ssl-for-sound-event-detection-complementary-fusion-and-adaptive-post-processing-2505.11889</loc><lastmod>2025-08-27</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/revisiting-ssl-for-sound-event-detection-complementary-fusion-and-adaptive-post-processing-2505.11889"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/revisiting-ssl-for-sound-event-detection-complementary-fusion-and-adaptive-post-processing-2505.11889"/></url>
<url><loc>https://scifaro.com/en/abs/binaqual-a-full-reference-objective-localization-similarity-metric-for-binaural-audio-2505.11915</loc><lastmod>2025-10-21</lastmod><xhtml:link rel="alternate" hreflang="en" href="https://scifaro.com/en/abs/binaqual-a-full-reference-objective-localization-similarity-metric-for-binaural-audio-2505.11915"/><xhtml:link rel="alternate" hreflang="x-default" href="https://scifaro.com/en/abs/binaqual-a-full-reference-objective-localization-similarity-metric-for-binaural-audio-2505.11915"/></url>
</urlset>