/******************************************************************************** * File: audio.cpp * Author: ppkantorski * Description: * Render-thread-safe audio with a single shared DMA playback buffer. * Key design: * - rawBuf : only per-sound allocation — compact native-channel 16-bit PCM, * native sample rate, no volume. Kept for re-render on vol/dock change. * - m_playBuf : single shared DMA-ready buffer, sized to the largest sound's * 48 kHz stereo output. All sounds share this one allocation. * - renderToPlayBuf() runs inside playSound(): resamples to 48 kHz (linear interp), * expands mono → L+R, and applies volume in one pass. * - No per-sound stereo copy is kept — memory cost per sound is rawBuf only. * - m_playBuf is safe to reuse because audout is always drained before writing. * - Volume and dock state are read live at render time — no stale tracking needed. * - Supports any WAV sample rate ≤ 48 kHz (8/11025/16000/22050/32000/44100/48000 Hz). * Source rates > 48 kHz are rejected at load time. * * For the latest updates and contributions, visit the project's GitHub repository. * (GitHub Repository: https://github.com/ppkantorski/Ultrahand-Overlay) * * Note: Please be aware that this notice cannot be altered or removed. It is a part * of the project's documentation and must remain intact. * * Licensed under both GPLv2 and CC-BY-4.0 * Copyright (c) 2025-2026 ppkantorski ********************************************************************************/ #include "audio.hpp" namespace ult { // ── Static member definitions ───────────────────────────────────────────── bool Audio::m_initialized = false; std::atomic Audio::m_enabled{true}; std::atomic Audio::m_masterVolumeFixed{154}; // 0.6 * 256 ≈ 154 bool Audio::m_lastDockedState = false; std::vector Audio::m_cachedSounds; std::mutex Audio::m_audioMutex; void* Audio::m_playBuf = nullptr; uint32_t Audio::m_playBufCap = 0; AudioOutBuffer Audio::m_audoutBuf = {}; // 4 KB — required by Switch audout DMA static constexpr uint32_t AUDIO_ALIGN = 0x1000; static constexpr uint32_t TARGET_RATE = 48000; // ── initialize ──────────────────────────────────────────────────────────── bool Audio::initialize() { std::lock_guard lock(m_audioMutex); if (m_initialized) return true; if (R_FAILED(audoutInitialize()) || R_FAILED(audoutStartAudioOut())) { audoutExit(); return false; } m_initialized = true; m_cachedSounds.resize(static_cast(SoundType::Count)); m_lastDockedState = ult::consoleIsDocked(); reloadAllSounds(); return true; } // ── exit ────────────────────────────────────────────────────────────────── void Audio::exit() { std::lock_guard lock(m_audioMutex); for (auto& s : m_cachedSounds) { free(s.rawBuf); s = CachedSound{}; } free(m_playBuf); m_playBuf = nullptr; m_playBufCap = 0; m_audoutBuf = {}; if (m_initialized) { audoutStopAudioOut(); audoutExit(); m_initialized = false; } } // ── reloadAllSounds ─────────────────────────────────────────────────────── void Audio::reloadAllSounds() { for (uint32_t i = 0; i < static_cast(SoundType::Count); ++i) loadSoundFromWav(static_cast(i), m_soundPaths[i]); // growPlayBuf() is called inside loadSoundFromWav after each load, // so m_playBuf is always sized to the current high-water mark. } // ── unloadAllSounds ─────────────────────────────────────────────────────── void Audio::unloadAllSounds(const std::initializer_list& excludeSounds) { std::lock_guard lock(m_audioMutex); if (!m_initialized) return; for (uint32_t i = 0; i < m_cachedSounds.size(); ++i) { const SoundType cur = static_cast(i); if (std::find(excludeSounds.begin(), excludeSounds.end(), cur) != excludeSounds.end()) continue; CachedSound& s = m_cachedSounds[i]; free(s.rawBuf); s = CachedSound{}; } // m_playBuf stays allocated — it will be reused when any remaining sound plays. } // ── reloadIfDockedChanged ───────────────────────────────────────────────── // Volume and dock state are read live in renderToPlayBuf(), so no stale // marking is needed here — just update m_lastDockedState. bool Audio::reloadIfDockedChanged() { if (!m_initialized) return false; const bool currentDocked = ult::consoleIsDocked(); if (currentDocked == m_lastDockedState) return false; std::lock_guard lock(m_audioMutex); m_lastDockedState = currentDocked; return true; } // ── growPlayBuf ─────────────────────────────────────────────────────────── // Computes the 48 kHz stereo output size needed for every currently loaded // sound and grows m_playBuf to cover the largest one. // Called after each loadSoundFromWav. Must hold m_audioMutex. void Audio::growPlayBuf() { uint32_t maxNeeded = 0; for (const auto& s : m_cachedSounds) { if (!s.rawBuf || s.rawSize == 0) continue; const uint32_t srcPerChan = s.isMono ? (s.rawSize / sizeof(s16)) : (s.rawSize / sizeof(s16)) / 2; const uint32_t outPerChan = (s.sampleRate == TARGET_RATE || s.sampleRate == 0) ? srcPerChan : static_cast( ((uint64_t)srcPerChan * TARGET_RATE + s.sampleRate - 1) / s.sampleRate); const uint32_t stereoBytes = outPerChan * 2 * sizeof(s16); const uint32_t needed = (stereoBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1); if (needed > maxNeeded) maxNeeded = needed; } if (maxNeeded <= m_playBufCap) return; // already large enough free(m_playBuf); m_playBuf = aligned_alloc(AUDIO_ALIGN, maxNeeded); if (m_playBuf) { m_playBufCap = maxNeeded; } else { m_playBufCap = 0; } } // ── renderToPlayBuf ─────────────────────────────────────────────────────── // Writes rawBuf → m_playBuf: resample to 48 kHz (linear interp if needed), // expand mono → L+R, apply current volume and dock attenuation. // Returns actual output byte count written, or 0 on error. // Must be called under m_audioMutex. uint32_t Audio::renderToPlayBuf(const CachedSound& s) { if (!s.rawBuf || s.rawSize == 0 || !m_playBuf) return 0; const uint32_t srcSamples = s.rawSize / sizeof(s16); const uint32_t srcPerChan = s.isMono ? srcSamples : srcSamples / 2; const uint32_t outPerChan = (s.sampleRate == TARGET_RATE || s.sampleRate == 0) ? srcPerChan : static_cast( ((uint64_t)srcPerChan * TARGET_RATE + s.sampleRate - 1) / s.sampleRate); const uint32_t stereoBytes = outPerChan * 2 * sizeof(s16); const uint32_t needed = (stereoBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1); if (needed > m_playBufCap) return 0; // shouldn't happen after growPlayBuf() // Effective volume: master * 0.5 when docked (TV speaker protection). // Fixed-point: 0–256 where 256 == 1.0. int32_t vol = m_masterVolumeFixed.load(std::memory_order_relaxed); if (m_lastDockedState) vol >>= 1; const s16* src = static_cast(s.rawBuf); s16* dst = static_cast(m_playBuf); const bool needsResample = (s.sampleRate != TARGET_RATE && s.sampleRate != 0); if (!needsResample) { // ── Fast path: native 48 kHz — no resampling ───────────────────── if (s.isMono) { for (uint32_t i = 0; i < srcSamples; ++i) { const s16 v = static_cast((static_cast(src[i]) * vol) >> 8); *dst++ = v; // L *dst++ = v; // R } } else { for (uint32_t i = 0; i < srcSamples; ++i) *dst++ = static_cast((static_cast(src[i]) * vol) >> 8); } } else { // ── Resample path: linear interpolation to 48 kHz ──────────────── // step is source frames per output frame in 16.16 fixed-point. // For all rates ≤ 48 kHz this is < 1.0 (upsampling). const uint64_t step = ((uint64_t)s.sampleRate << 16) / TARGET_RATE; uint64_t srcFixed = 0; if (s.isMono) { for (uint32_t i = 0; i < outPerChan; ++i) { const uint32_t i0 = static_cast(srcFixed >> 16); const uint32_t i1 = (i0 + 1 < srcPerChan) ? i0 + 1 : i0; const int32_t frac = static_cast(srcFixed & 0xFFFF); const int32_t s0 = src[i0], s1 = src[i1]; const s16 v = static_cast( ((s0 + (((s1 - s0) * frac) >> 16)) * vol) >> 8); *dst++ = v; // L *dst++ = v; // R srcFixed += step; } } else { // Stereo interleaved: index [frame*2+0] = L, [frame*2+1] = R for (uint32_t i = 0; i < outPerChan; ++i) { const uint32_t i0 = static_cast(srcFixed >> 16); const uint32_t i1 = (i0 + 1 < srcPerChan) ? i0 + 1 : i0; const int32_t frac = static_cast(srcFixed & 0xFFFF); const int32_t l0 = src[i0*2], l1 = src[i1*2]; const int32_t r0 = src[i0*2 + 1], r1 = src[i1*2 + 1]; *dst++ = static_cast(((l0 + (((l1-l0)*frac)>>16)) * vol) >> 8); *dst++ = static_cast(((r0 + (((r1-r0)*frac)>>16)) * vol) >> 8); srcFixed += step; } } } // Zero-fill alignment padding if (stereoBytes < needed) memset(static_cast(m_playBuf) + stereoBytes, 0, needed - stereoBytes); return stereoBytes; } // ── loadSoundFromWav ────────────────────────────────────────────────────── // Reads WAV → rawBuf (16-bit, native channels, no volume applied). // Rejects source rates > 48 kHz. Calls growPlayBuf() after a successful load. // Must be called under m_audioMutex. bool Audio::loadSoundFromWav(SoundType type, const char* path) { const uint32_t idx = static_cast(type); if (!m_initialized || idx >= static_cast(SoundType::Count)) return false; CachedSound& s = m_cachedSounds[idx]; free(s.rawBuf); s = CachedSound{}; // reset all fields FILE* f = fopen(path, "rb"); if (!f) return false; // ── RIFF/WAVE header ────────────────────────────────────────────────── char hdr[12]; if (fread(hdr, 1, 12, f) != 12 || memcmp(hdr, "RIFF", 4) || memcmp(hdr + 8, "WAVE", 4)) { fclose(f); return false; } u16 fmt = 0, ch = 0, bits = 0; u32 rate = 0, dSize = 0; long dPos = 0; // ── Chunk scan ──────────────────────────────────────────────────────── while (fread(hdr, 1, 8, f) == 8) { const u32 sz = *reinterpret_cast(hdr + 4); if (!memcmp(hdr, "fmt ", 4)) { fread(&fmt, 2, 1, f); fread(&ch, 2, 1, f); fread(&rate, 4, 1, f); fseek(f, 6, SEEK_CUR); // skip byte rate + block align fread(&bits, 2, 1, f); fseek(f, (long)sz - 16, SEEK_CUR); } else if (!memcmp(hdr, "data", 4)) { dSize = sz; dPos = ftell(f); break; } else { fseek(f, sz, SEEK_CUR); } } // ── Validate ────────────────────────────────────────────────────────── // Reject rates above 48 kHz — downsampling would expand rawBuf beyond its // target-rate output and defeat the purpose of small source files. if (!dSize || fmt != 1 || ch == 0 || ch > 2 || (bits != 8 && bits != 16) || rate == 0 || rate > TARGET_RATE) { fclose(f); return false; } const uint32_t inSamples = dSize / (bits / 8); const uint32_t rawBytes = inSamples * sizeof(s16); // normalised to 16-bit const uint32_t rawCap = (rawBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1); void* buf = aligned_alloc(AUDIO_ALIGN, rawCap); if (!buf) { fclose(f); return false; } fseek(f, dPos, SEEK_SET); s16* out = static_cast(buf); uint32_t remaining = inSamples; uint32_t outIdx = 0; // ── Chunked read + bit-depth normalisation ──────────────────────────── constexpr uint32_t CHUNK = 512; if (bits == 8) { u8 chunk[CHUNK]; while (remaining > 0) { const uint32_t toRead = std::min(remaining, CHUNK); if (fread(chunk, 1, toRead, f) != toRead) { free(buf); fclose(f); return false; } for (uint32_t i = 0; i < toRead; ++i) out[outIdx++] = static_cast((static_cast(chunk[i]) - 128) << 8); remaining -= toRead; } } else { s16 chunk[CHUNK]; while (remaining > 0) { const uint32_t toRead = std::min(remaining, CHUNK); if (fread(chunk, sizeof(s16), toRead, f) != toRead) { free(buf); fclose(f); return false; } memcpy(out + outIdx, chunk, toRead * sizeof(s16)); outIdx += toRead; remaining -= toRead; } } fclose(f); if (rawBytes < rawCap) memset(static_cast(buf) + rawBytes, 0, rawCap - rawBytes); s.rawBuf = buf; s.rawSize = rawBytes; s.rawCap = rawCap; s.sampleRate = rate; s.isMono = (ch == 1); // Grow shared play buffer if this sound's 48 kHz output would exceed it. growPlayBuf(); return (m_playBuf != nullptr); } // ── playSound ───────────────────────────────────────────────────────────── // Drains the audout queue, renders the sound into the shared play buffer, // then submits. Volume and dock attenuation are applied live inside render. void Audio::playSound(SoundType type) { if (!m_enabled.load(std::memory_order_relaxed)) return; const uint32_t idx = static_cast(type); if (idx >= static_cast(SoundType::Count)) return; std::lock_guard lock(m_audioMutex); if (!m_initialized || !m_playBuf) return; const CachedSound& s = m_cachedSounds[idx]; if (!s.rawBuf) return; // sound file not loaded // Drain finished buffers so audout's queue stays healthy and so we know // the shared buffer is no longer in use by a previous submission. AudioOutBuffer* released = nullptr; u32 releasedCount = 0; audoutGetReleasedAudioOutBuffer(&released, &releasedCount); const uint32_t outBytes = renderToPlayBuf(s); if (outBytes == 0) return; const uint32_t bufCap = (outBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1); m_audoutBuf = {}; m_audoutBuf.buffer = m_playBuf; m_audoutBuf.buffer_size = bufCap; m_audoutBuf.data_size = outBytes; m_audoutBuf.data_offset = 0; m_audoutBuf.next = nullptr; AudioOutBuffer* rel = nullptr; audoutPlayBuffer(&m_audoutBuf, &rel); } // ── Volume / enable accessors ───────────────────────────────────────────── // Volume is read live in renderToPlayBuf() — no stale marking needed. void Audio::setMasterVolume(float v) { const int32_t fixed = static_cast(std::clamp(v, 0.0f, 1.0f) * 256.0f); m_masterVolumeFixed.store(fixed, std::memory_order_relaxed); } void Audio::setEnabled(bool e) { m_enabled.store(e, std::memory_order_relaxed); } bool Audio::isEnabled() { return m_enabled.load(std::memory_order_relaxed); } } // namespace ult