🔉 fix: Normalize audio MIME types in STT format validation (#12674)

* fix: normalize audio MIME types in STT format validation

Use getFileExtensionFromMime() to normalize non-standard MIME types
(e.g. audio/x-m4a, audio/x-wav, audio/x-flac) before checking against
the accepted formats list in azureOpenAIProvider. This is the same class
of bug as #12608 (text/x-markdown), but for STT audio validation.

Only audio/ and video/ MIME prefixes are normalized to prevent
non-audio types from matching via the webm default fallback.

Export getFileExtensionFromMime for testability.

Fixes #12632

* fix: reject unknown audio subtypes in STT format validation

Use MIME_TO_EXTENSION_MAP for normalization instead of
getFileExtensionFromMime() which falls back to 'webm' for unrecognized
types. Gate raw subtype matching on audio/video prefix to prevent
non-audio types (e.g. text/webm) from passing validation.

Resolves Codex review comment about unknown subtypes silently passing.

---------

Co-authored-by: Tobias Jonas <t.jonas@innfactory.de>
This commit is contained in:
Danny Avila 2026-04-15 09:58:07 -04:00 committed by GitHub
parent 76e9543f99
commit 6183303653
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 123 additions and 4 deletions

View file

@ -238,9 +238,15 @@ class STTService {
}
const acceptedFormats = ['flac', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'];
const fileFormat = audioFile.mimetype.split('/')[1];
if (!acceptedFormats.includes(fileFormat)) {
throw new Error(`The audio file format ${fileFormat} is not accepted`);
const [mimePrefix, rawFormat = ''] = audioFile.mimetype.split('/');
const isAudioMime = mimePrefix === 'audio' || mimePrefix === 'video';
const isKnownMime = audioFile.mimetype in MIME_TO_EXTENSION_MAP;
const normalizedFormat = isKnownMime ? MIME_TO_EXTENSION_MAP[audioFile.mimetype] : null;
if (
!acceptedFormats.includes(normalizedFormat) &&
!(isAudioMime && acceptedFormats.includes(rawFormat))
) {
throw new Error(`The audio file format ${rawFormat} is not accepted`);
}
const formData = new FormData();
@ -377,4 +383,4 @@ async function speechToText(req, res) {
await sttService.processSpeechToText(req, res);
}
module.exports = { STTService, speechToText };
module.exports = { STTService, speechToText, getFileExtensionFromMime, MIME_TO_EXTENSION_MAP };

View file

@ -0,0 +1,113 @@
// Mock all external dependencies so we can test getFileExtensionFromMime in isolation
jest.mock('axios');
jest.mock('form-data');
jest.mock('https-proxy-agent');
jest.mock('@librechat/data-schemas', () => ({ logger: { warn: jest.fn(), error: jest.fn() } }));
jest.mock('@librechat/api', () => ({ genAzureEndpoint: jest.fn(), logAxiosError: jest.fn() }));
jest.mock('librechat-data-provider', () => ({
extractEnvVariable: jest.fn(),
STTProviders: {},
}));
jest.mock('~/server/services/Config', () => ({ getAppConfig: jest.fn() }));
const { getFileExtensionFromMime, MIME_TO_EXTENSION_MAP } = require('./STTService');
describe('getFileExtensionFromMime', () => {
it('should normalize audio/x-m4a to m4a', () => {
expect(getFileExtensionFromMime('audio/x-m4a')).toBe('m4a');
});
it('should normalize audio/mp4 to m4a', () => {
expect(getFileExtensionFromMime('audio/mp4')).toBe('m4a');
});
it('should normalize audio/x-wav to wav', () => {
expect(getFileExtensionFromMime('audio/x-wav')).toBe('wav');
});
it('should normalize audio/x-flac to flac', () => {
expect(getFileExtensionFromMime('audio/x-flac')).toBe('flac');
});
it('should normalize audio/mpeg to mp3', () => {
expect(getFileExtensionFromMime('audio/mpeg')).toBe('mp3');
});
it('should return webm for audio/webm', () => {
expect(getFileExtensionFromMime('audio/webm')).toBe('webm');
});
it('should return ogg for audio/ogg', () => {
expect(getFileExtensionFromMime('audio/ogg')).toBe('ogg');
});
it('should fall back to webm for unknown MIME types', () => {
expect(getFileExtensionFromMime('audio/somethingelse')).toBe('webm');
});
it('should return webm for null/undefined input', () => {
expect(getFileExtensionFromMime(null)).toBe('webm');
expect(getFileExtensionFromMime(undefined)).toBe('webm');
});
});
describe('STT audio format validation with MIME normalization', () => {
const acceptedFormats = ['flac', 'mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'ogg', 'wav', 'webm'];
/**
* Mirrors the format validation logic in azureOpenAIProvider.
* Only uses MIME_TO_EXTENSION_MAP for normalization so unknown audio
* subtypes are not silently accepted via the webm default fallback.
* Raw subtype matching is gated on audio/video prefix to prevent
* non-audio types like text/webm from passing.
*/
function isFormatAccepted(mimetype) {
const [mimePrefix, rawFormat = ''] = mimetype.split('/');
const isAudioMime = mimePrefix === 'audio' || mimePrefix === 'video';
const isKnownMime = mimetype in MIME_TO_EXTENSION_MAP;
const normalizedFormat = isKnownMime ? MIME_TO_EXTENSION_MAP[mimetype] : null;
return (
acceptedFormats.includes(normalizedFormat) ||
(isAudioMime && acceptedFormats.includes(rawFormat))
);
}
it('should accept audio/x-m4a (browser MIME for .m4a files)', () => {
expect(isFormatAccepted('audio/x-m4a')).toBe(true);
});
it('should accept audio/x-wav', () => {
expect(isFormatAccepted('audio/x-wav')).toBe(true);
});
it('should accept audio/x-flac', () => {
expect(isFormatAccepted('audio/x-flac')).toBe(true);
});
it('should accept standard formats directly', () => {
expect(isFormatAccepted('audio/mpeg')).toBe(true);
expect(isFormatAccepted('audio/wav')).toBe(true);
expect(isFormatAccepted('audio/ogg')).toBe(true);
expect(isFormatAccepted('audio/webm')).toBe(true);
expect(isFormatAccepted('audio/flac')).toBe(true);
expect(isFormatAccepted('audio/mp3')).toBe(true);
expect(isFormatAccepted('audio/mp4')).toBe(true);
expect(isFormatAccepted('audio/mpga')).toBe(true);
});
it('should reject unknown audio subtypes', () => {
expect(isFormatAccepted('audio/aac')).toBe(false);
expect(isFormatAccepted('audio/somethingelse')).toBe(false);
expect(isFormatAccepted('video/unknown')).toBe(false);
});
it('should accept application/ogg (valid Ogg container MIME type in the map)', () => {
expect(isFormatAccepted('application/ogg')).toBe(true);
});
it('should reject non-audio types even if subtype matches an accepted format', () => {
expect(isFormatAccepted('text/webm')).toBe(false);
expect(isFormatAccepted('text/plain')).toBe(false);
expect(isFormatAccepted('application/json')).toBe(false);
});
});