diff --git a/Source/sys-clk-OC/sysmodule/Makefile b/Source/sys-clk-OC/sysmodule/Makefile index 6a29934a..2b3b8853 100644 --- a/Source/sys-clk-OC/sysmodule/Makefile +++ b/Source/sys-clk-OC/sysmodule/Makefile @@ -39,7 +39,7 @@ DEFINES := -DDISABLE_IPC -DTARGET="\"$(TARGET)\"" -DTARGET_VERSION="\"$(TARGET_V ARCH := -march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE -CFLAGS := -g -Wall -O2 -ffunction-sections \ +CFLAGS := -g -Wall -O3 -ffunction-sections \ $(ARCH) $(DEFINES) CFLAGS += $(INCLUDE) -D__SWITCH__ diff --git a/Source/sys-clk-OC/sysmodule/lib/nxExt/include/nxExt/apm_ext.h b/Source/sys-clk-OC/sysmodule/lib/nxExt/include/nxExt/apm_ext.h index b8fd2284..98a1b520 100644 --- a/Source/sys-clk-OC/sysmodule/lib/nxExt/include/nxExt/apm_ext.h +++ b/Source/sys-clk-OC/sysmodule/lib/nxExt/include/nxExt/apm_ext.h @@ -23,7 +23,13 @@ void apmExtExit(void); Result apmExtGetPerformanceMode(u32 *out_mode); Result apmExtSysRequestPerformanceMode(u32 mode); Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf); -bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited); + +inline bool apmExtIsCPUBoosted(u32 conf_id) { // CPU boosted to 1785 MHz + return (conf_id == 0x92220009 || conf_id == 0x9222000A); +}; +inline bool apmExtIsBoostMode(u32 conf_id) { // GPU throttled to 76.8 MHz + return (conf_id >= 0x92220009 && conf_id <= 0x9222000C); +}; #ifdef __cplusplus } diff --git a/Source/sys-clk-OC/sysmodule/lib/nxExt/src/apm_ext.c b/Source/sys-clk-OC/sysmodule/lib/nxExt/src/apm_ext.c index 736e015f..ba30d4ff 100644 --- a/Source/sys-clk-OC/sysmodule/lib/nxExt/src/apm_ext.c +++ b/Source/sys-clk-OC/sysmodule/lib/nxExt/src/apm_ext.c @@ -64,9 +64,3 @@ Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf) { return serviceDispatchOut(&g_apmSysSrv, 7, *out_conf); } - -bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited) { - if (allow_cpu_limited) - return (conf_id >= 0x92220009 && conf_id <= 0x922200C); - return (conf_id == 0x92220009 || conf_id == 0x922200A); -} \ No newline at end of file diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp index fd02201e..6bb87234 100644 --- a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp +++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp @@ -77,15 +77,15 @@ ClockManager::~ClockManager() delete this->config; } -bool ClockManager::IsCpuBoostMode() +bool ClockManager::IsBoostMode() { std::uint32_t confId = this->context->perfConfId; - bool isCpuBoostMode = apmExtIsBoostMode(confId, false); - if (isCpuBoostMode && !this->oc->boostCPUFreq) { + bool isBoostMode = apmExtIsBoostMode(confId); + if (apmExtIsCPUBoosted(confId) && !this->oc->boostCPUFreq) { this->oc->boostCPUFreq = std::max(this->context->freqs[SysClkModule_CPU], 1785'000'000U); this->governor->SetCPUBoostHz(this->oc->boostCPUFreq); } - return isCpuBoostMode; + return isBoostMode; } void ClockManager::SetRunning(bool running) @@ -186,7 +186,7 @@ void ClockManager::Tick() if (hz && hz != this->context->freqs[module]) { // Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz - bool skipBoost = IsCpuBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU); + bool skipBoost = IsBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU); if (!skipBoost) { FileUtils::LogLine("[mgr] %s clock set : %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10); Clocks::SetHz((SysClkModule)module, hz); @@ -312,7 +312,7 @@ bool ClockManager::RefreshContext() } // let ptm module handle boost clocks rather than resetting - if (hasChanged && !IsCpuBoostMode()) { + if (hasChanged && !IsBoostMode()) { Clocks::ResetToStock(); } diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.h b/Source/sys-clk-OC/sysmodule/src/clock_manager.h index 3dd8ff71..509e36d6 100644 --- a/Source/sys-clk-OC/sysmodule/src/clock_manager.h +++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.h @@ -52,7 +52,7 @@ class ClockManager ReverseNXSync *rnxSync; Governor *governor; - bool IsCpuBoostMode(); + bool IsBoostMode(); uint32_t GetHz(SysClkModule); }; diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp index 383ccbfd..6d3b08fa 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp @@ -92,7 +92,7 @@ void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) { void Governor::Start() { m_stop_threads = false; - svcSleepThread(8 * TICK_TIME_MAIN_NS); + svcSleepThread(8 * TICK_TIME_NS); Result rc = 0; for (int core = 0; core < CORE_NUMS; core++) { @@ -124,7 +124,7 @@ void Governor::Start() { void Governor::Stop() { m_stop_threads = true; - svcSleepThread(8 * TICK_TIME_MAIN_NS); + svcSleepThread(8 * TICK_TIME_NS); threadWaitForExit(&m_t_main); threadClose(&m_t_main); @@ -144,7 +144,7 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) { m_cpu_freq.idx_max_hz = FindIndex(&m_cpu_freq, max_hz); break; case SysClkModule_GPU: - m_gpu_freq.idx_boost_hz = m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz); + m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz); break; case SysClkModule_MEM: m_mem_freq = max_hz; @@ -237,19 +237,19 @@ void Governor::CheckCpuUtilWorker(void* args) { } void Governor::CheckCpuUtilWorkerAppCore(int64_t coreid) { - constexpr uint64_t STUCK_TICKS = 2; + constexpr uint64_t STUCK_TICKS = 5; s_Queue q; while (!m_stop_threads) { bool isBusy = m_core3_stuck_cnt > STUCK_TICKS * (CORE_NUMS - 1); if (isBusy) { m_core3_stuck_cnt = 0; SetBoostHz(&m_cpu_freq); - svcSleepThread(STUCK_TICKS * TICK_TIME_CPU_NS); + svcSleepThread(STUCK_TICKS * TICK_TIME_NS); } else { m_core3_stuck_cnt++; } - uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get(); + uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get(); q.PopAndPush(load); m_cpu_core_ctx[coreid].util = q.GetAvg(); } @@ -259,7 +259,7 @@ void Governor::CheckCpuUtilWorkerSysCore() { s_Queue q; int64_t coreid = CORE_NUMS - 1; while (!m_stop_threads) { - uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get(); + uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get(); q.PopAndPush(load); m_cpu_core_ctx[coreid].util = q.GetAvg() * 7 / 8; // Adjusted, Multipler: 0.875 } @@ -284,7 +284,7 @@ void Governor::Main(void* args) { } q; auto GetGpuUtil = [nvgpu_field, q]() mutable { - uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_GPU_MS).Get(); + uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_NS).Get(); if (load > 20) { // Ignore load <= 2.0% q.queue[q.pos % QUEUE_SIZE] = load; q.pos++; @@ -301,29 +301,30 @@ void Governor::Main(void* args) { return load; }; - uint64_t update_ticks = SAMPLE_RATE_MAIN; + constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2; + uint64_t update_ticks = UPDATE_CONTEXT_RATE; bool CPUBoosted = false; - bool GPUBoosted = false; // Limited to 76.8 MHz, literally + bool GPUThrottled = false; while (!self->m_stop_threads) { self->m_core3_stuck_cnt = 0; - bool shouldUpdateContext = update_ticks++ >= SAMPLE_RATE_MAIN; + bool shouldUpdateContext = update_ticks++ >= UPDATE_CONTEXT_RATE; if (shouldUpdateContext) { update_ticks = 0; uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU); // Sleep mode detected, wait 1 tick while (!hz) { self->m_core3_stuck_cnt = 0; - svcSleepThread(TICK_TIME_MAIN_NS); + svcSleepThread(TICK_TIME_NS); hz = Clocks::GetCurrentHz(SysClkModule_GPU); } - GPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, true); - CPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, false); + GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id); + CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id); self->m_gpu_freq.idx_target_hz = FindIndex(&self->m_gpu_freq, hz); - if (GPUBoosted) + if (GPUThrottled) SetBoostHz(&self->m_gpu_freq); hz = Clocks::GetCurrentHz(SysClkModule_CPU); @@ -337,7 +338,7 @@ void Governor::Main(void* args) { if (hz != self->m_mem_freq) Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq); } else { - if (!GPUBoosted) { + if (!GPUThrottled) { uint32_t gpu_util = GetGpuUtil(); if (gpu_util > GPU_THR_RAMP_MAX) { if (TargetRamp(&self->m_gpu_freq, RAMP_MAX)) @@ -362,7 +363,7 @@ void Governor::Main(void* args) { } } - svcSleepThread(TICK_TIME_MAIN_NS); + svcSleepThread(TICK_TIME_NS); } } diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.h b/Source/sys-clk-OC/sysmodule/src/oc_extra.h index 576e2c08..554d6447 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.h +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.h @@ -13,8 +13,8 @@ class CpuCoreUtil { public: - CpuCoreUtil (int coreid = -2, uint64_t ms = 1): - m_core_id(coreid), m_wait_time_ms(ms), m_wait_time_ns(ms * 1000'000ULL) {}; + CpuCoreUtil (int coreid = -2, uint64_t ns = 1000'000ULL): + m_core_id(coreid), m_wait_time_ns(ns) {}; inline uint64_t Get() { Start(); WaitForStop(); Stop(); return Calculate(); }; inline void Start() { m_idletick = GetIdleTickCount(); }; @@ -22,11 +22,11 @@ public: inline void Stop() { m_idletick = GetIdleTickCount() - m_idletick; }; static constexpr uint64_t TICKS_PER_MS = 192; - inline uint64_t Calculate() { return 100'0 - m_idletick * 10 / (TICKS_PER_MS * m_wait_time_ms); }; + inline uint64_t Calculate() { return 100'0 - m_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * m_wait_time_ns); }; protected: const int m_core_id; - const uint64_t m_wait_time_ms, m_wait_time_ns; + const uint64_t m_wait_time_ns; uint64_t m_idletick; inline uint64_t GetIdleTickCount() { @@ -38,8 +38,8 @@ protected: class GpuCoreUtil { public: - GpuCoreUtil (uint32_t nvgpu_field, uint64_t ms = 1): - m_nvgpu_field(nvgpu_field), m_wait_time_ns(ms * 1000'000ULL) {}; + GpuCoreUtil (uint32_t nvgpu_field, uint64_t ns = 1000'000ULL): + m_nvgpu_field(nvgpu_field), m_wait_time_ns(ns) {}; inline uint64_t Get() { Wait(); return GetLoad(); }; inline void Wait() { svcSleepThread(m_wait_time_ns); }; @@ -127,19 +127,14 @@ public: protected: // Parameters for sampling - static constexpr uint64_t SAMPLE_RATE_MAIN = 60, SAMPLE_RATE_GPU = 60; - static constexpr uint64_t SAMPLE_RATE_CPU = SAMPLE_RATE_GPU / 2; - static constexpr uint64_t UPDATE_CONTEXT_RATE = 60; - static constexpr uint64_t TICK_TIME_CPU_MS = 1000 / SAMPLE_RATE_CPU; - static constexpr uint64_t TICK_TIME_CPU_NS = 1E9 / SAMPLE_RATE_CPU; - static constexpr uint64_t TICK_TIME_GPU_MS = 1000 / SAMPLE_RATE_GPU; - static constexpr uint64_t TICK_TIME_MAIN_MS = 1000 / SAMPLE_RATE_MAIN; - static constexpr uint64_t TICK_TIME_MAIN_NS = 1E9 / SAMPLE_RATE_MAIN; + static constexpr uint64_t SAMPLE_RATE = 200; + static constexpr uint64_t TICK_TIME_MS = 1000 / SAMPLE_RATE; + static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE; // Parameters for frequency ramp threshold static constexpr uint64_t CPU_THR_RAMP_DOWN = 70'0; static constexpr uint64_t CPU_THR_RAMP_UP = 90'0; - static constexpr uint64_t GPU_THR_RAMP_DOWN = 60'0; + static constexpr uint64_t GPU_THR_RAMP_DOWN = 70'0; static constexpr uint64_t GPU_THR_RAMP_UP = 80'0; static constexpr uint64_t GPU_THR_RAMP_MAX = 90'0; @@ -196,7 +191,7 @@ private: // Much faster than from stl T queue[QUEUE_SIZE] = { 0 }; T sum = 0; - T pos = 0; + size_t pos = 0; T GetAvg() { return sum / QUEUE_SIZE; }; T GetFirst() { return queue[pos % QUEUE_SIZE]; };