Files
Horizon-OC/Source/hoc-clk/overlay/lib/libultrahand/libultra/source/audio.cpp
2026-04-01 16:00:54 -04:00

410 lines
18 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/********************************************************************************
* File: audio.cpp
* Author: ppkantorski
* Description:
* Render-thread-safe audio with a single shared DMA playback buffer.
* Key design:
* - rawBuf : only per-sound allocation — compact native-channel 16-bit PCM,
* native sample rate, no volume. Kept for re-render on vol/dock change.
* - m_playBuf : single shared DMA-ready buffer, sized to the largest sound's
* 48 kHz stereo output. All sounds share this one allocation.
* - renderToPlayBuf() runs inside playSound(): resamples to 48 kHz (linear interp),
* expands mono → L+R, and applies volume in one pass.
* - No per-sound stereo copy is kept — memory cost per sound is rawBuf only.
* - m_playBuf is safe to reuse because audout is always drained before writing.
* - Volume and dock state are read live at render time — no stale tracking needed.
* - Supports any WAV sample rate ≤ 48 kHz (8/11025/16000/22050/32000/44100/48000 Hz).
* Source rates > 48 kHz are rejected at load time.
*
* For the latest updates and contributions, visit the project's GitHub repository.
* (GitHub Repository: https://github.com/ppkantorski/Ultrahand-Overlay)
*
* Note: Please be aware that this notice cannot be altered or removed. It is a part
* of the project's documentation and must remain intact.
*
* Licensed under both GPLv2 and CC-BY-4.0
* Copyright (c) 2025-2026 ppkantorski
********************************************************************************/
#include "audio.hpp"
namespace ult {
// ── Static member definitions ─────────────────────────────────────────────
bool Audio::m_initialized = false;
std::atomic<bool> Audio::m_enabled{true};
std::atomic<int32_t> Audio::m_masterVolumeFixed{154}; // 0.6 * 256 ≈ 154
bool Audio::m_lastDockedState = false;
std::vector<Audio::CachedSound> Audio::m_cachedSounds;
std::mutex Audio::m_audioMutex;
void* Audio::m_playBuf = nullptr;
uint32_t Audio::m_playBufCap = 0;
AudioOutBuffer Audio::m_audoutBuf = {};
// 4 KB — required by Switch audout DMA
static constexpr uint32_t AUDIO_ALIGN = 0x1000;
static constexpr uint32_t TARGET_RATE = 48000;
// ── initialize ────────────────────────────────────────────────────────────
bool Audio::initialize() {
std::lock_guard<std::mutex> lock(m_audioMutex);
if (m_initialized) return true;
if (R_FAILED(audoutInitialize()) || R_FAILED(audoutStartAudioOut())) {
audoutExit();
return false;
}
m_initialized = true;
m_cachedSounds.resize(static_cast<uint32_t>(SoundType::Count));
m_lastDockedState = ult::consoleIsDocked();
reloadAllSounds();
return true;
}
// ── exit ──────────────────────────────────────────────────────────────────
void Audio::exit() {
std::lock_guard<std::mutex> lock(m_audioMutex);
for (auto& s : m_cachedSounds) {
free(s.rawBuf);
s = CachedSound{};
}
free(m_playBuf);
m_playBuf = nullptr;
m_playBufCap = 0;
m_audoutBuf = {};
if (m_initialized) {
audoutStopAudioOut();
audoutExit();
m_initialized = false;
}
}
// ── reloadAllSounds ───────────────────────────────────────────────────────
void Audio::reloadAllSounds() {
for (uint32_t i = 0; i < static_cast<uint32_t>(SoundType::Count); ++i)
loadSoundFromWav(static_cast<SoundType>(i), m_soundPaths[i]);
// growPlayBuf() is called inside loadSoundFromWav after each load,
// so m_playBuf is always sized to the current high-water mark.
}
// ── unloadAllSounds ───────────────────────────────────────────────────────
void Audio::unloadAllSounds(const std::initializer_list<SoundType>& excludeSounds) {
std::lock_guard<std::mutex> lock(m_audioMutex);
if (!m_initialized) return;
for (uint32_t i = 0; i < m_cachedSounds.size(); ++i) {
const SoundType cur = static_cast<SoundType>(i);
if (std::find(excludeSounds.begin(), excludeSounds.end(), cur) != excludeSounds.end())
continue;
CachedSound& s = m_cachedSounds[i];
free(s.rawBuf);
s = CachedSound{};
}
// m_playBuf stays allocated — it will be reused when any remaining sound plays.
}
// ── reloadIfDockedChanged ─────────────────────────────────────────────────
// Volume and dock state are read live in renderToPlayBuf(), so no stale
// marking is needed here — just update m_lastDockedState.
bool Audio::reloadIfDockedChanged() {
if (!m_initialized) return false;
const bool currentDocked = ult::consoleIsDocked();
if (currentDocked == m_lastDockedState) return false;
std::lock_guard<std::mutex> lock(m_audioMutex);
m_lastDockedState = currentDocked;
return true;
}
// ── growPlayBuf ───────────────────────────────────────────────────────────
// Computes the 48 kHz stereo output size needed for every currently loaded
// sound and grows m_playBuf to cover the largest one.
// Called after each loadSoundFromWav. Must hold m_audioMutex.
void Audio::growPlayBuf() {
uint32_t maxNeeded = 0;
for (const auto& s : m_cachedSounds) {
if (!s.rawBuf || s.rawSize == 0) continue;
const uint32_t srcPerChan = s.isMono
? (s.rawSize / sizeof(s16))
: (s.rawSize / sizeof(s16)) / 2;
const uint32_t outPerChan = (s.sampleRate == TARGET_RATE || s.sampleRate == 0)
? srcPerChan
: static_cast<uint32_t>(
((uint64_t)srcPerChan * TARGET_RATE + s.sampleRate - 1) / s.sampleRate);
const uint32_t stereoBytes = outPerChan * 2 * sizeof(s16);
const uint32_t needed = (stereoBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1);
if (needed > maxNeeded) maxNeeded = needed;
}
if (maxNeeded <= m_playBufCap) return; // already large enough
free(m_playBuf);
m_playBuf = aligned_alloc(AUDIO_ALIGN, maxNeeded);
if (m_playBuf) {
m_playBufCap = maxNeeded;
} else {
m_playBufCap = 0;
}
}
// ── renderToPlayBuf ───────────────────────────────────────────────────────
// Writes rawBuf → m_playBuf: resample to 48 kHz (linear interp if needed),
// expand mono → L+R, apply current volume and dock attenuation.
// Returns actual output byte count written, or 0 on error.
// Must be called under m_audioMutex.
uint32_t Audio::renderToPlayBuf(const CachedSound& s) {
if (!s.rawBuf || s.rawSize == 0 || !m_playBuf) return 0;
const uint32_t srcSamples = s.rawSize / sizeof(s16);
const uint32_t srcPerChan = s.isMono ? srcSamples : srcSamples / 2;
const uint32_t outPerChan = (s.sampleRate == TARGET_RATE || s.sampleRate == 0)
? srcPerChan
: static_cast<uint32_t>(
((uint64_t)srcPerChan * TARGET_RATE + s.sampleRate - 1) / s.sampleRate);
const uint32_t stereoBytes = outPerChan * 2 * sizeof(s16);
const uint32_t needed = (stereoBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1);
if (needed > m_playBufCap) return 0; // shouldn't happen after growPlayBuf()
// Effective volume: master * 0.5 when docked (TV speaker protection).
// Fixed-point: 0256 where 256 == 1.0.
int32_t vol = m_masterVolumeFixed.load(std::memory_order_relaxed);
if (m_lastDockedState) vol >>= 1;
const s16* src = static_cast<const s16*>(s.rawBuf);
s16* dst = static_cast<s16*>(m_playBuf);
const bool needsResample = (s.sampleRate != TARGET_RATE && s.sampleRate != 0);
if (!needsResample) {
// ── Fast path: native 48 kHz — no resampling ─────────────────────
if (s.isMono) {
for (uint32_t i = 0; i < srcSamples; ++i) {
const s16 v = static_cast<s16>((static_cast<int32_t>(src[i]) * vol) >> 8);
*dst++ = v; // L
*dst++ = v; // R
}
} else {
for (uint32_t i = 0; i < srcSamples; ++i)
*dst++ = static_cast<s16>((static_cast<int32_t>(src[i]) * vol) >> 8);
}
} else {
// ── Resample path: linear interpolation to 48 kHz ────────────────
// step is source frames per output frame in 16.16 fixed-point.
// For all rates ≤ 48 kHz this is < 1.0 (upsampling).
const uint64_t step = ((uint64_t)s.sampleRate << 16) / TARGET_RATE;
uint64_t srcFixed = 0;
if (s.isMono) {
for (uint32_t i = 0; i < outPerChan; ++i) {
const uint32_t i0 = static_cast<uint32_t>(srcFixed >> 16);
const uint32_t i1 = (i0 + 1 < srcPerChan) ? i0 + 1 : i0;
const int32_t frac = static_cast<int32_t>(srcFixed & 0xFFFF);
const int32_t s0 = src[i0], s1 = src[i1];
const s16 v = static_cast<s16>(
((s0 + (((s1 - s0) * frac) >> 16)) * vol) >> 8);
*dst++ = v; // L
*dst++ = v; // R
srcFixed += step;
}
} else {
// Stereo interleaved: index [frame*2+0] = L, [frame*2+1] = R
for (uint32_t i = 0; i < outPerChan; ++i) {
const uint32_t i0 = static_cast<uint32_t>(srcFixed >> 16);
const uint32_t i1 = (i0 + 1 < srcPerChan) ? i0 + 1 : i0;
const int32_t frac = static_cast<int32_t>(srcFixed & 0xFFFF);
const int32_t l0 = src[i0*2], l1 = src[i1*2];
const int32_t r0 = src[i0*2 + 1], r1 = src[i1*2 + 1];
*dst++ = static_cast<s16>(((l0 + (((l1-l0)*frac)>>16)) * vol) >> 8);
*dst++ = static_cast<s16>(((r0 + (((r1-r0)*frac)>>16)) * vol) >> 8);
srcFixed += step;
}
}
}
// Zero-fill alignment padding
if (stereoBytes < needed)
memset(static_cast<u8*>(m_playBuf) + stereoBytes, 0, needed - stereoBytes);
return stereoBytes;
}
// ── loadSoundFromWav ──────────────────────────────────────────────────────
// Reads WAV → rawBuf (16-bit, native channels, no volume applied).
// Rejects source rates > 48 kHz. Calls growPlayBuf() after a successful load.
// Must be called under m_audioMutex.
bool Audio::loadSoundFromWav(SoundType type, const char* path) {
const uint32_t idx = static_cast<uint32_t>(type);
if (!m_initialized || idx >= static_cast<uint32_t>(SoundType::Count)) return false;
CachedSound& s = m_cachedSounds[idx];
free(s.rawBuf);
s = CachedSound{}; // reset all fields
FILE* f = fopen(path, "rb");
if (!f) return false;
// ── RIFF/WAVE header ──────────────────────────────────────────────────
char hdr[12];
if (fread(hdr, 1, 12, f) != 12 ||
memcmp(hdr, "RIFF", 4) ||
memcmp(hdr + 8, "WAVE", 4)) {
fclose(f); return false;
}
u16 fmt = 0, ch = 0, bits = 0;
u32 rate = 0, dSize = 0;
long dPos = 0;
// ── Chunk scan ────────────────────────────────────────────────────────
while (fread(hdr, 1, 8, f) == 8) {
const u32 sz = *reinterpret_cast<const u32*>(hdr + 4);
if (!memcmp(hdr, "fmt ", 4)) {
fread(&fmt, 2, 1, f);
fread(&ch, 2, 1, f);
fread(&rate, 4, 1, f);
fseek(f, 6, SEEK_CUR); // skip byte rate + block align
fread(&bits, 2, 1, f);
fseek(f, (long)sz - 16, SEEK_CUR);
} else if (!memcmp(hdr, "data", 4)) {
dSize = sz;
dPos = ftell(f);
break;
} else {
fseek(f, sz, SEEK_CUR);
}
}
// ── Validate ──────────────────────────────────────────────────────────
// Reject rates above 48 kHz — downsampling would expand rawBuf beyond its
// target-rate output and defeat the purpose of small source files.
if (!dSize || fmt != 1 || ch == 0 || ch > 2 ||
(bits != 8 && bits != 16) || rate == 0 || rate > TARGET_RATE) {
fclose(f); return false;
}
const uint32_t inSamples = dSize / (bits / 8);
const uint32_t rawBytes = inSamples * sizeof(s16); // normalised to 16-bit
const uint32_t rawCap = (rawBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1);
void* buf = aligned_alloc(AUDIO_ALIGN, rawCap);
if (!buf) { fclose(f); return false; }
fseek(f, dPos, SEEK_SET);
s16* out = static_cast<s16*>(buf);
uint32_t remaining = inSamples;
uint32_t outIdx = 0;
// ── Chunked read + bit-depth normalisation ────────────────────────────
constexpr uint32_t CHUNK = 512;
if (bits == 8) {
u8 chunk[CHUNK];
while (remaining > 0) {
const uint32_t toRead = std::min(remaining, CHUNK);
if (fread(chunk, 1, toRead, f) != toRead) {
free(buf); fclose(f); return false;
}
for (uint32_t i = 0; i < toRead; ++i)
out[outIdx++] = static_cast<s16>((static_cast<int32_t>(chunk[i]) - 128) << 8);
remaining -= toRead;
}
} else {
s16 chunk[CHUNK];
while (remaining > 0) {
const uint32_t toRead = std::min(remaining, CHUNK);
if (fread(chunk, sizeof(s16), toRead, f) != toRead) {
free(buf); fclose(f); return false;
}
memcpy(out + outIdx, chunk, toRead * sizeof(s16));
outIdx += toRead;
remaining -= toRead;
}
}
fclose(f);
if (rawBytes < rawCap)
memset(static_cast<u8*>(buf) + rawBytes, 0, rawCap - rawBytes);
s.rawBuf = buf;
s.rawSize = rawBytes;
s.rawCap = rawCap;
s.sampleRate = rate;
s.isMono = (ch == 1);
// Grow shared play buffer if this sound's 48 kHz output would exceed it.
growPlayBuf();
return (m_playBuf != nullptr);
}
// ── playSound ─────────────────────────────────────────────────────────────
// Drains the audout queue, renders the sound into the shared play buffer,
// then submits. Volume and dock attenuation are applied live inside render.
void Audio::playSound(SoundType type) {
if (!m_enabled.load(std::memory_order_relaxed)) return;
const uint32_t idx = static_cast<uint32_t>(type);
if (idx >= static_cast<uint32_t>(SoundType::Count)) return;
std::lock_guard<std::mutex> lock(m_audioMutex);
if (!m_initialized || !m_playBuf) return;
const CachedSound& s = m_cachedSounds[idx];
if (!s.rawBuf) return; // sound file not loaded
// Drain finished buffers so audout's queue stays healthy and so we know
// the shared buffer is no longer in use by a previous submission.
AudioOutBuffer* released = nullptr;
u32 releasedCount = 0;
audoutGetReleasedAudioOutBuffer(&released, &releasedCount);
const uint32_t outBytes = renderToPlayBuf(s);
if (outBytes == 0) return;
const uint32_t bufCap = (outBytes + AUDIO_ALIGN - 1) & ~(AUDIO_ALIGN - 1);
m_audoutBuf = {};
m_audoutBuf.buffer = m_playBuf;
m_audoutBuf.buffer_size = bufCap;
m_audoutBuf.data_size = outBytes;
m_audoutBuf.data_offset = 0;
m_audoutBuf.next = nullptr;
AudioOutBuffer* rel = nullptr;
audoutPlayBuffer(&m_audoutBuf, &rel);
}
// ── Volume / enable accessors ─────────────────────────────────────────────
// Volume is read live in renderToPlayBuf() — no stale marking needed.
void Audio::setMasterVolume(float v) {
const int32_t fixed = static_cast<int32_t>(std::clamp(v, 0.0f, 1.0f) * 256.0f);
m_masterVolumeFixed.store(fixed, std::memory_order_relaxed);
}
void Audio::setEnabled(bool e) {
m_enabled.store(e, std::memory_order_relaxed);
}
bool Audio::isEnabled() {
return m_enabled.load(std::memory_order_relaxed);
}
} // namespace ult