From b0ace1d8ae9be9dafd63edc51f897c928f5af5d4 Mon Sep 17 00:00:00 2001 From: KazushiM <85604869+KazushiMe@users.noreply.github.com> Date: Thu, 9 Feb 2023 22:21:11 +0800 Subject: [PATCH] sys-clk-OC: Separate governor into multiple modules that could be disabled individually --- Source/sys-clk-OC/README.md | 8 +- .../sys-clk-OC/common/include/sysclk/clocks.h | 18 +- Source/sys-clk-OC/common/src/clock_table.c | 11 +- .../overlay/src/ui/gui/app_profile_gui.cpp | 23 +- .../sysmodule/src/clock_manager.cpp | 35 +- Source/sys-clk-OC/sysmodule/src/config.cpp | 43 +- Source/sys-clk-OC/sysmodule/src/config.h | 6 +- Source/sys-clk-OC/sysmodule/src/oc_extra.cpp | 341 +++++++------- Source/sys-clk-OC/sysmodule/src/oc_extra.h | 445 +++++++++++------- 9 files changed, 529 insertions(+), 401 deletions(-) diff --git a/Source/sys-clk-OC/README.md b/Source/sys-clk-OC/README.md index 32f492aa..5d33d4cc 100644 --- a/Source/sys-clk-OC/README.md +++ b/Source/sys-clk-OC/README.md @@ -135,7 +135,7 @@ Copy the `atmosphere`, and `switch` folders at the root of your sdcard, overwrit * sys-clk overlay (accessible from anywhere by invoking the [Tesla menu](https://gbatemp.net/threads/tesla-the-nintendo-switch-overlay-menu.557362/)) `/switch/.overlays/sys-clk-overlay.ovl` - + * sys-clk core sysmodule `/atmosphere/contents/00FF0000636C6BFF/exefs.nsp` @@ -143,7 +143,7 @@ Copy the `atmosphere`, and `switch` folders at the root of your sdcard, overwrit ## Config -Presets can be customized by adding them to the ini config file located at `/config/sys-clk/config.ini`, using the following template for each app +Presets can be customized by adding them to the ini config file located at `/config/sys-clk/config.ini`, using the following template for each app ``` [Application Title ID] @@ -162,14 +162,14 @@ handheld_charging_official_mem= handheld_cpu= handheld_gpu= handheld_mem= -governor_disabled= +governor_config= ``` * Replace `Application Title ID` with the title id of the game/application you're interested in customizing. A list of games title id can be found in the [Switchbrew wiki](https://switchbrew.org/wiki/Title_list/Games). * Frequencies are expressed in mhz, and will be scaled to the nearest possible values, described in the clock table below. * If any key is omitted, value is empty or set to 0, it will be ignored, and stock clocks will apply. -* If charging, sys-clk will look for the frequencies in that order, picking the first found +* If charging, sys-clk will look for the frequencies in that order, picking the first found 1. Charger specific config (USB or Official) `handheld_charging_usb_X` or `handheld_charging_official_X` 2. Non specific charging config `handheld_charging_X` 3. Handheld config `handheld_X` diff --git a/Source/sys-clk-OC/common/include/sysclk/clocks.h b/Source/sys-clk-OC/common/include/sysclk/clocks.h index 80b28200..8f3d6fe5 100644 --- a/Source/sys-clk-OC/common/include/sysclk/clocks.h +++ b/Source/sys-clk-OC/common/include/sysclk/clocks.h @@ -63,7 +63,6 @@ typedef struct { bool systemCoreBoostCPU; bool batteryChargingDisabledOverride; - bool governor; SysClkProfile realProfile; } SysClkOcExtra; @@ -72,7 +71,20 @@ typedef struct uint32_t values[20]; } SysClkFrequencyTable; -uint32_t* GetTable(SysClkModule module); +uint32_t* GetModuleFreqTable(SysClkModule module); +uint32_t GetModuleMaximumFreq(SysClkModule module); + +typedef enum { + SysClkOcGovernorConfig_AllDisabled = 0, + SysClkOcGovernorConfig_CPU_Shift = 0, + SysClkOcGovernorConfig_CPUOnly = 1, + SysClkOcGovernorConfig_CPU = 1 << SysClkOcGovernorConfig_CPU_Shift, + SysClkOcGovernorConfig_GPU_Shift = 1 << SysClkOcGovernorConfig_CPU_Shift, + SysClkOcGovernorConfig_GPUOnly = 1 << SysClkOcGovernorConfig_GPU_Shift, + SysClkOcGovernorConfig_GPU = 1 << SysClkOcGovernorConfig_GPU_Shift, + SysClkOcGovernorConfig_Default = 3, + SysClkOcGovernorConfig_Mask = 3, +} SysClkOcGovernorConfig; typedef struct { @@ -80,7 +92,7 @@ typedef struct uint32_t mhz[(size_t)SysClkProfile_EnumMax * (size_t)SysClkModule_EnumMax]; uint32_t mhzMap[SysClkProfile_EnumMax][SysClkModule_EnumMax]; }; - bool governorDisabled; + SysClkOcGovernorConfig governorConfig; } SysClkTitleProfileList; #define SYSCLK_GLOBAL_PROFILE_TID 0xA111111111111111 diff --git a/Source/sys-clk-OC/common/src/clock_table.c b/Source/sys-clk-OC/common/src/clock_table.c index 3b798c9a..ffab2946 100644 --- a/Source/sys-clk-OC/common/src/clock_table.c +++ b/Source/sys-clk-OC/common/src/clock_table.c @@ -76,7 +76,7 @@ uint32_t g_freq_table_gpu_hz[] = { 0, }; -uint32_t* GetTable(SysClkModule module) { +uint32_t* GetModuleFreqTable(SysClkModule module) { switch (module) { case SysClkModule_CPU: return &g_freq_table_cpu_hz[0]; @@ -88,3 +88,12 @@ uint32_t* GetTable(SysClkModule module) { return NULL; } } + +uint32_t GetModuleMaximumFreq(SysClkModule module) { + uint32_t* p = GetModuleFreqTable(module); + if (p == NULL || *p == 0) + return UINT32_MAX; + + while (*(++p)); + return *(--p); +} diff --git a/Source/sys-clk-OC/overlay/src/ui/gui/app_profile_gui.cpp b/Source/sys-clk-OC/overlay/src/ui/gui/app_profile_gui.cpp index 6ef822e0..36811401 100644 --- a/Source/sys-clk-OC/overlay/src/ui/gui/app_profile_gui.cpp +++ b/Source/sys-clk-OC/overlay/src/ui/gui/app_profile_gui.cpp @@ -73,14 +73,29 @@ void AppProfileGui::listUI() bool globalGovernorEnabled = configList->values[SysClkConfigValue_GovernorExperimental]; if (globalGovernorEnabled) { - tsl::elm::ToggleListItem* governorToggle = new tsl::elm::ToggleListItem("Disable governor", this->profileList->governorDisabled); - governorToggle->setStateChangedListener([this](bool state) { - this->profileList->governorDisabled = state; + tsl::elm::ToggleListItem* cpuGovernorToggle = new tsl::elm::ToggleListItem("CPU Freq Governor", + (this->profileList->governorConfig >> SysClkOcGovernorConfig_CPU_Shift) & 1); + cpuGovernorToggle->setStateChangedListener([this](bool state) { + this->profileList->governorConfig = + SysClkOcGovernorConfig((this->profileList->governorConfig & SysClkOcGovernorConfig_GPUOnly) | state << SysClkOcGovernorConfig_CPU_Shift); + Result rc = sysclkIpcSetProfiles(this->applicationId, this->profileList); if (R_FAILED(rc)) FatalGui::openWithResultCode("sysclkIpcSetProfiles", rc); }); - this->listElement->addItem(governorToggle); + this->listElement->addItem(cpuGovernorToggle); + + tsl::elm::ToggleListItem* gpuGovernorToggle = new tsl::elm::ToggleListItem("GPU Freq Governor", + (this->profileList->governorConfig >> SysClkOcGovernorConfig_GPU_Shift) & 1); + gpuGovernorToggle->setStateChangedListener([this](bool state) { + this->profileList->governorConfig = + SysClkOcGovernorConfig((this->profileList->governorConfig & SysClkOcGovernorConfig_CPUOnly) | state << SysClkOcGovernorConfig_GPU_Shift); + + Result rc = sysclkIpcSetProfiles(this->applicationId, this->profileList); + if (R_FAILED(rc)) + FatalGui::openWithResultCode("sysclkIpcSetProfiles", rc); + }); + this->listElement->addItem(gpuGovernorToggle); } delete configList; diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp index 2f5b15b3..a28e5fe1 100644 --- a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp +++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp @@ -59,7 +59,6 @@ ClockManager::ClockManager() this->oc = new SysClkOcExtra; this->oc->systemCoreBoostCPU = false; this->oc->batteryChargingDisabledOverride = false; - this->oc->governor = false; this->oc->realProfile = SysClkProfile_Handheld; this->rnxSync = new ReverseNXSync; @@ -150,12 +149,12 @@ void ClockManager::Tick() uint32_t hz = GetHz((SysClkModule)module); this->governor->SetMaxHz(hz, (SysClkModule)module); - bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM); - if (hz && hz != this->context->freqs[module] && !handledByGovernor) + if (hz && hz != this->context->freqs[module] && !this->governor->IsHandledByGovernor((SysClkModule)module)) { // Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz - bool skipBoost = apmExtIsBoostMode(this->context->perfConfId); - skipBoost &= (module == SysClkModule_CPU && hz <= Clocks::boostCpuFreq) || module == SysClkModule_GPU; + bool skipBoost = apmExtIsBoostMode(this->context->perfConfId) && + ((module == SysClkModule_CPU && hz <= Clocks::boostCpuFreq) || module == SysClkModule_GPU); + if (!skipBoost) { FileUtils::LogLine("[mgr] %s clock set : %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10); Clocks::SetHz((SysClkModule)module, hz); @@ -174,7 +173,7 @@ void ClockManager::WaitForNextTick() /* Self-check system core (#3) usage via idleticks at intervals (Not enabled at higher CPU freq or without charger) */ uint64_t tickWaitTimeMs = this->GetConfig()->GetConfigValue(SysClkConfigValue_PollingIntervalMs); - if (this->oc->governor) { + if (this->governor->IsHandledByGovernor(SysClkModule_CPU)) { svcSleepThread(tickWaitTimeMs * 1000'000ULL); return; } @@ -240,21 +239,14 @@ bool ClockManager::RefreshContext() this->rnxSync->Reset(applicationId); } - bool governor = this->GetConfig()->GetConfigValue(SysClkConfigValue_GovernorExperimental); - governor &= !this->GetConfig()->GetTitleGovernorDisabled(applicationId); - if (governor != this->oc->governor) - { - this->oc->governor = governor; - FileUtils::LogLine("[mgr] Governor status: %s", governor ? "enabled" : "disabled"); - hasChanged = true; - } - - if (hasChanged) { - if (enabled && governor) - this->governor->Start(); - else - this->governor->Stop(); + SysClkOcGovernorConfig governorConfig = SysClkOcGovernorConfig_AllDisabled; + if (this->GetConfig()->GetConfigValue(SysClkConfigValue_GovernorExperimental)) { + governorConfig = SysClkOcGovernorConfig_Default; + SysClkOcGovernorConfig governorConfigTitle = this->GetConfig()->GetTitleGovernorConfig(applicationId); + if (governorConfig != governorConfigTitle) + governorConfig = governorConfigTitle; } + this->governor->SetConfig(governorConfig); SysClkProfile realProfile = Clocks::GetCurrentProfile(); if (realProfile != this->oc->realProfile) @@ -301,8 +293,7 @@ bool ClockManager::RefreshContext() if (hz != 0 && hz != this->context->freqs[module]) { this->context->freqs[module] = hz; - bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM); - if (!handledByGovernor) { + if (!this->governor->IsHandledByGovernor((SysClkModule)module)) { FileUtils::LogLine("[mgr] %s clock change: %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10); hasChanged = true; } diff --git a/Source/sys-clk-OC/sysmodule/src/config.cpp b/Source/sys-clk-OC/sysmodule/src/config.cpp index c5e9a0e6..2eb5f9e4 100644 --- a/Source/sys-clk-OC/sysmodule/src/config.cpp +++ b/Source/sys-clk-OC/sysmodule/src/config.cpp @@ -25,7 +25,7 @@ Config::Config(std::string path) this->loaded = false; this->profileMhzMap = std::map, std::uint32_t>(); this->profileCountMap = std::map(); - this->profileGovernorDisabled = std::map(); + this->profileGovernorMap = std::map(); this->mtime = 0; this->enabled = false; for(unsigned int i = 0; i < SysClkModule_EnumMax; i++) @@ -79,7 +79,7 @@ void Config::Close() this->loaded = false; this->profileMhzMap.clear(); this->profileCountMap.clear(); - this->profileGovernorDisabled.clear(); + this->profileGovernorMap.clear(); for(unsigned int i = 0; i < SysClkConfigValue_EnumMax; i++) { @@ -171,18 +171,18 @@ std::uint32_t Config::GetAutoClockHz(std::uint64_t tid, SysClkModule module, Sys return 0; } -bool Config::GetTitleGovernorDisabled(std::uint64_t tid) +SysClkOcGovernorConfig Config::GetTitleGovernorConfig(std::uint64_t tid) { if (this->loaded) { - std::map::const_iterator it = this->profileGovernorDisabled.find(tid); - if (it != this->profileGovernorDisabled.end()) + std::map::const_iterator it = this->profileGovernorMap.find(tid); + if (it != this->profileGovernorMap.end()) { return it->second; } } - return false; + return SysClkOcGovernorConfig_Default; } void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles) @@ -197,11 +197,12 @@ void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles } } - std::map::const_iterator it = this->profileGovernorDisabled.find(tid); - bool governorDisabled = false; - if (it != this->profileGovernorDisabled.end() && it->second) - governorDisabled = true; - out_profiles->governorDisabled = governorDisabled; + std::map::const_iterator it = this->profileGovernorMap.find(tid); + SysClkOcGovernorConfig governor = SysClkOcGovernorConfig_Default; + // Found + if (it != this->profileGovernorMap.end()) + governor = it->second; + out_profiles->governorConfig = governor; } bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate) @@ -254,9 +255,9 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo } } - if (profiles->governorDisabled) { - snprintf(sk, 0x40, "%s", CONFIG_KEY_TITLE_GOVERNOR_DISABLED); - snprintf(sv, 0x10, "%d", profiles->governorDisabled); + if (profiles->governorConfig != SysClkOcGovernorConfig_Default) { + snprintf(sk, 0x40, "%s", CONFIG_KEY_TITLE_GOVERNOR_CONFIG); + snprintf(sv, 0x10, "%d", profiles->governorConfig); *ik++ = sk; *iv++ = sv; } @@ -290,10 +291,10 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo } } - if (profiles->governorDisabled) - this->profileGovernorDisabled[tid] = profiles->governorDisabled; + if (profiles->governorConfig == SysClkOcGovernorConfig_Default) + this->profileGovernorMap.erase(tid); else - this->profileGovernorDisabled.erase(tid); + this->profileGovernorMap[tid] = profiles->governorConfig; } return true; @@ -343,13 +344,13 @@ int Config::BrowseIniFunc(const char* section, const char* key, const char* valu return 1; } - if (!strcmp(key, CONFIG_KEY_TITLE_GOVERNOR_DISABLED)) { + if (!strcmp(key, CONFIG_KEY_TITLE_GOVERNOR_CONFIG)) { input = strtoul(value, NULL, 0); - if ((input & 0x1) != input) { - input = 0; + if ((input & SysClkOcGovernorConfig_Mask) != input) { + input = SysClkOcGovernorConfig_Default; FileUtils::LogLine("[cfg] Invalid value for key '%s' in section '%s': using default %d", key, section, input); } - config->profileGovernorDisabled[tid] = (bool)input; + config->profileGovernorMap[tid] = (SysClkOcGovernorConfig)input; return 1; } diff --git a/Source/sys-clk-OC/sysmodule/src/config.h b/Source/sys-clk-OC/sysmodule/src/config.h index 99d7aaf4..acc7a150 100644 --- a/Source/sys-clk-OC/sysmodule/src/config.h +++ b/Source/sys-clk-OC/sysmodule/src/config.h @@ -21,7 +21,7 @@ #define CONFIG_VAL_SECTION "values" -#define CONFIG_KEY_TITLE_GOVERNOR_DISABLED "governor_disabled" +#define CONFIG_KEY_TITLE_GOVERNOR_CONFIG "governor_config" class Config { @@ -39,7 +39,7 @@ class Config void GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles); bool SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate); std::uint32_t GetAutoClockHz(std::uint64_t tid, SysClkModule module, SysClkProfile profile); - bool GetTitleGovernorDisabled(std::uint64_t tid); + SysClkOcGovernorConfig GetTitleGovernorConfig(std::uint64_t tid); void SetEnabled(bool enabled); bool Enabled(); @@ -61,7 +61,7 @@ class Config std::map, std::uint32_t> profileMhzMap; std::map profileCountMap; - std::map profileGovernorDisabled; + std::map profileGovernorMap; bool loaded; std::string path; time_t mtime; diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp index f433369e..1c917db0 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp @@ -90,6 +90,7 @@ ReverseNXMode ReverseNXSync::RecheckToolMode() { if (this->m_tool_enabled) { const char* fileName = "_ZN2nn2oe16GetOperationModeEv.asm64"; // or _ZN2nn2oe18GetPerformanceModeEv.asm64 const char* filePath = new char[72]; + SCOPE_EXIT { delete[] filePath; }; /* Check per-game patch */ snprintf((char*)filePath, 72, "/SaltySD/patches/%016lX/%s", this->m_app_id, fileName); mode = this->GetToolModeFromPatch(filePath); @@ -98,7 +99,6 @@ ReverseNXMode ReverseNXSync::RecheckToolMode() { snprintf((char*)filePath, 72, "/SaltySD/patches/%s", fileName); mode = this->GetToolModeFromPatch(filePath); } - delete[] filePath; } return mode; @@ -135,157 +135,94 @@ void PsmExt::ChargingHandler(ClockManager* instance) { delete info; } - -Governor::Governor() { - memset(reinterpret_cast(&m_cpu_freq), 0, sizeof(m_cpu_freq)); - memset(reinterpret_cast(&m_gpu_freq), 0, sizeof(m_gpu_freq)); - - m_cpu_freq.module = SysClkModule_CPU; - m_gpu_freq.module = SysClkModule_GPU; - - m_cpu_freq.hz_list = GetTable(SysClkModule_CPU); - m_gpu_freq.hz_list = GetTable(SysClkModule_GPU); - - m_cpu_freq.boost_hz = Clocks::boostCpuFreq; - m_cpu_freq.utilref_hz = 2397'000'000; - - m_gpu_freq.boost_hz = 76'800'000; - m_gpu_freq.min_hz = 153'600'000; - m_gpu_freq.utilref_hz = 1305'600'000; - - nvInitialize(); - Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu"); - if (R_FAILED(rc)) { - ASSERT_RESULT_OK(rc, "nvOpen"); - nvExit(); - } -} - -Governor::~Governor() { - Stop(); - nvClose(m_nvgpu_field); - nvExit(); -} - -void Governor::Start() { - if (m_running) - return; - - m_running = true; - Result rc = 0; - for (int core = 0; core < CORE_NUMS; core++) { - s_CoreContext* s = &m_cpu_core_ctx[core]; - s->self = this; - s->id = core; - int prio = (core == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT - rc = threadCreate(&m_t_cpuworker[core], &CpuUtilWorker, (void*)s, NULL, 0x400, prio, core); - ASSERT_RESULT_OK(rc, "threadCreate"); - rc = threadStart(&m_t_cpuworker[core]); - ASSERT_RESULT_OK(rc, "threadStart"); - } - rc = threadCreate(&m_t_main, &Main, (void*)this, NULL, 0x400, 0x3F, 3); - ASSERT_RESULT_OK(rc, "threadCreate"); - rc = threadStart(&m_t_main); - ASSERT_RESULT_OK(rc, "threadStart"); -} - -void Governor::Stop() { - if (!m_running) - return; - - m_running = false; - svcSleepThread(TICK_TIME_NS); - - threadWaitForExit(&m_t_main); - threadClose(&m_t_main); - - for (int core = 0; core < CORE_NUMS; core++) { - threadWaitForExit(&m_t_cpuworker[core]); - threadClose(&m_t_cpuworker[core]); - } -} - -void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) { - if (!max_hz) // Fallback to apm configuration - max_hz = Clocks::GetStockClock(m_apm_conf, (SysClkModule)module); - - switch (module) { - case SysClkModule_CPU: - m_cpu_freq.max_hz = max_hz; - break; - case SysClkModule_GPU: - m_gpu_freq.max_hz = max_hz; - m_gpu_freq.min_hz = (m_gpu_freq.max_hz <= 153'600'000) ? max_hz : 153'600'000; - break; - default: - break; - } -} - -void Governor::SetPerfConf(uint32_t id) { - m_perf_conf_id = id; - m_apm_conf = Clocks::GetEmbeddedApmConfig(id); -} - -uint32_t Governor::s_FreqContext::GetNormalizedUtil(uint32_t raw_util) { - return ((uint64_t)raw_util * target_hz / utilref_hz); -} +namespace GovernorImpl { // Schedutil: https://github.com/torvalds/linux/blob/master/kernel/sched/cpufreq_schedutil.c // C = 1.25, tipping-point 80.0% (used in Linux schedutil), 1.25 -> 1 + (1 >> 2) // C = 1.5, tipping-point 66.7%, 1.5 -> 1 + (1 >> 1) // Utilization is frequency-invariant (normalized): -// next_freq = C * max_freq(ref_freq) * util / max -void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) { - uint32_t prev_hz = target_hz; - - auto FindHzInTable = [](uint32_t* hz_list, uint32_t in_hz) { - uint32_t* p = hz_list; +// target_freq = C * max_freq(ref_freq) * util / max +void BaseGovernor::ApplyNewFreqFromNormUtil(uint32_t normUtil) { + auto FindHzInTable = [](uint32_t* list, uint32_t hz) -> uint32_t { + uint32_t* p = list; for (; *p != 0; p++) { - if (in_hz <= *p) - return p; + if (hz <= *p) + return *p; } - return (--p); + return *(--p); }; - uint32_t next_freq = utilref_hz / UTIL_MAX * norm_util; + uint32_t next_freq = m_ref_hz / UTIL_MAX * normUtil; next_freq += next_freq >> 1; + uint32_t new_hz; if (next_freq >= max_hz) - target_hz = max_hz; + new_hz = max_hz; else if (next_freq <= min_hz) - target_hz = min_hz; + new_hz = min_hz; else - target_hz = *FindHzInTable(hz_list, next_freq); + new_hz = FindHzInTable(m_hz_list, next_freq); - bool changed = target_hz != prev_hz; - if (changed) - SetHz(); + ApplyTargetFreq(new_hz); } -void Governor::s_FreqContext::SetHz() { - if (target_hz) - Clocks::SetHz(module, target_hz); +void CpuGovernor::GovernorWorker::Start() { + if (this->running) + return; + + this->running = true; + Result rc = 0; + for (int id = 0; id < CORE_NUMS; id++) { + WorkerContext* s = &contexts[id]; + s->super = this->super; + s->id = id; + int prio = (id == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT + rc = threadCreate(&threads[id], &WorkerContext::Loop, (void*)s, NULL, 0x400, prio, id); + ASSERT_RESULT_OK(rc, "threadCreate"); + rc = threadStart(&threads[id]); + ASSERT_RESULT_OK(rc, "threadStart"); + } } -void Governor::s_FreqContext::Boost() { - target_hz = boost_hz; - if (module == SysClkModule_CPU && max_hz > boost_hz) - target_hz = max_hz; - SetHz(); +void CpuGovernor::GovernorWorker::Stop() { + if (!this->running) + return; + + this->running = false; + svcSleepThread(TICK_TIME_NS); + + for (auto &t : threads) { + threadWaitForExit(&t); + threadClose(&t); + } } -void Governor::CpuUtilWorker(void* args) { - s_CoreContext* s = static_cast(args); +void CpuGovernor::Apply() { + uint32_t util = 0; + for (auto& ctx : this->m_worker.contexts) { + uint32_t core_util = ctx.util; + if (util < core_util) + util = core_util; + } + + this->m_util.Update(util); + if (this->auto_boost && this->m_worker.contexts[SYS_CORE_ID].util > BOOST_THRESHOLD) + this->ApplyBoost(); + else + this->ApplyNewFreqFromNormUtil(this->m_util.Get()); +} + +void CpuGovernor::WorkerContext::Loop(void* args) { + WorkerContext* s = static_cast(args); + CpuGovernor* self = s->super; + GovernorWorker* worker = &(self->m_worker); int coreid = s->id; - Governor* self = s->self; - while (self->m_running) { + while (worker->running) { uint64_t tick = s->tick = armGetSystemTick(); - s->util = self->m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get()); + s->util = self->CalcNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get()); - bool CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id); - if (CPUBoosted) { + if (apmExtIsCPUBoosted(self->m_manager->GetPerfConf())) { svcSleepThread(TICK_TIME_NS); continue; } @@ -295,88 +232,128 @@ void Governor::CpuUtilWorker(void* args) { if (id == coreid) continue; - uint64_t diff = std::abs((int64_t)self->m_cpu_core_ctx[id].tick - (int64_t)tick); + uint64_t diff = std::abs((int64_t)worker->contexts[id].tick - (int64_t)tick); if (diff < SYSTICK_HZ / SAMPLE_RATE * 10) continue; - if (id == SYS_CORE_ID && self->m_syscore_autoboost) { - self->m_cpu_freq.Boost(); + // Stuck on system core and auto boost enabled, apply boost + if (id == SYS_CORE_ID && self->auto_boost) { + self->ApplyBoost(); break; } - self->m_cpu_freq.target_hz = self->m_cpu_freq.max_hz; - self->m_cpu_freq.SetHz(); + // Stuck on other cores or auto boost disabled, apply max hz + self->ApplyTargetFreq(self->max_hz); break; } } } -void Governor::Main(void* args) { +void GpuGovernor::Apply() { + uint32_t util = this->CalcNormalizedUtil(GpuCoreUtil(m_nvgpu_field).Get()); + this->m_util.Update(util); + this->ApplyNewFreqFromNormUtil(this->m_util.Get()); +} + +} + +bool Governor::IsHandledByGovernor(SysClkModule module) { + switch (module) { + case SysClkModule_CPU: + return ((this->GetConfig() >> SysClkOcGovernorConfig_CPU_Shift) & 1); + case SysClkModule_GPU: + return ((this->GetConfig() >> SysClkOcGovernorConfig_GPU_Shift) & 1); + case SysClkModule_MEM: + return false; + default: + return this->GetConfig() != SysClkOcGovernorConfig_AllDisabled; + } +} + +void Governor::SetConfig(SysClkOcGovernorConfig config) { + if (m_config == config) + return; + + m_config = config; + m_cpu_gov->m_worker.onConfigUpdated(config); + m_manager.onConfigUpdated(config); +}; + +void Governor::SetPerfConf(uint32_t id) { + m_perf_conf_id = id; + m_apm_conf = Clocks::GetEmbeddedApmConfig(id); +} + +void Governor::SetMaxHz(uint32_t maxHz, SysClkModule module) { + if (!maxHz) // Fallback to apm configuration + maxHz = Clocks::GetStockClock(m_apm_conf, (SysClkModule)module); + + switch (module) { + case SysClkModule_CPU: + m_cpu_gov->max_hz = maxHz; + break; + case SysClkModule_GPU: + m_gpu_gov->max_hz = maxHz; + m_gpu_gov->min_hz = (maxHz <= 153'600'000) ? maxHz : 153'600'000; + break; + default: + break; + } +} + +void Governor::GovernorManager::Start() { + if (this->running) + return; + + this->running = true; + Result rc = threadCreate(&thread, &ContextManager, (void*)this, NULL, 0x400, 0x3F, 3); + ASSERT_RESULT_OK(rc, "threadCreate"); + rc = threadStart(&thread); + ASSERT_RESULT_OK(rc, "threadStart"); +} + +void Governor::GovernorManager::Stop() { + if (!this->running) + return; + + this->running = false; + svcSleepThread(TICK_TIME_NS); + threadWaitForExit(&thread); + threadClose(&thread); +} + +void Governor::GovernorManager::ContextManager(void* args) { Governor* self = static_cast(args); - s_FreqContext* cpu_ctx = &self->m_cpu_freq; - s_FreqContext* gpu_ctx = &self->m_gpu_freq; - uint32_t nvgpu_field = self->m_nvgpu_field; - - s_CpuUtil *cpu_util = new s_CpuUtil; - s_GpuUtil *gpu_util = new s_GpuUtil; - auto SetCpuFreq = [self, cpu_ctx, cpu_util]() mutable { - uint32_t util = self->m_cpu_core_ctx[0].util; - for (size_t i = 1; i < CORE_NUMS; i++) { - if (util < self->m_cpu_core_ctx[i].util) - util = self->m_cpu_core_ctx[i].util; - } - cpu_util->Update(util); - if (self->m_cpu_core_ctx[SYS_CORE_ID].util > BOOST_THRESHOLD && self->m_syscore_autoboost) - cpu_ctx->Boost(); - else - cpu_ctx->SetNextFreq(cpu_util->Get()); - }; - - auto SetGpuFreq = [gpu_ctx, nvgpu_field, gpu_util]() mutable { - uint32_t util = gpu_ctx->GetNormalizedUtil(GpuCoreUtil(nvgpu_field).Get()); - gpu_util->Update(util); - util = gpu_util->Get(); - gpu_ctx->SetNextFreq(util); - }; constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2; uint64_t update_ticks = UPDATE_CONTEXT_RATE; - bool CPUBoosted = false; - bool GPUThrottled = false; + bool cpuBoosted = false, gpuThrottled = false; - while (self->m_running) { + while (self->m_manager.running) { bool shouldUpdateContext = ++update_ticks >= UPDATE_CONTEXT_RATE; if (shouldUpdateContext) { update_ticks = 0; - uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU); + + uint32_t hz = self->m_gpu_gov->RefreshContext(); // Sleep mode detected, wait 10 ticks while (!hz) { svcSleepThread(10 * TICK_TIME_NS); - hz = Clocks::GetCurrentHz(SysClkModule_GPU); + hz = self->m_gpu_gov->RefreshContext(); } - GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id); - CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id); + uint32_t perf_conf = self->GetPerfConf(); + if ((gpuThrottled = apmExtIsBoostMode(perf_conf)) && (self->GetConfig() & SysClkOcGovernorConfig_GPU)) + self->m_gpu_gov->ApplyBoost(); - gpu_ctx->target_hz = hz; - if (GPUThrottled) - gpu_ctx->Boost(); - - hz = Clocks::GetCurrentHz(SysClkModule_CPU); - cpu_ctx->target_hz = hz; - if (CPUBoosted) - cpu_ctx->Boost(); + if ((cpuBoosted = apmExtIsCPUBoosted(perf_conf)) && (self->GetConfig() & SysClkOcGovernorConfig_CPU)) + self->m_cpu_gov->ApplyBoost(); } - if (!GPUThrottled) - SetGpuFreq(); - if (!CPUBoosted) - SetCpuFreq(); + if (!gpuThrottled && (self->GetConfig() & SysClkOcGovernorConfig_GPU)) + self->m_gpu_gov->Apply(); + if (!cpuBoosted && (self->GetConfig() & SysClkOcGovernorConfig_CPU)) + self->m_cpu_gov->Apply(); svcSleepThread(TICK_TIME_NS); } - - delete cpu_util; - delete gpu_util; -} - +}; diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.h b/Source/sys-clk-OC/sysmodule/src/oc_extra.h index a9f4310c..8cec552b 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.h +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.h @@ -12,8 +12,10 @@ // Forward declaration class ClockManager; +class Governor; #include "clock_manager.h" + class CpuCoreUtil { public: CpuCoreUtil (int coreid, uint64_t ns); @@ -28,6 +30,7 @@ protected: uint64_t GetIdleTickCount(); }; + class GpuCoreUtil { public: GpuCoreUtil (uint32_t nvgpu_field); @@ -38,6 +41,7 @@ protected: static constexpr uint64_t NVGPU_GPU_IOCTL_PMU_GET_GPU_LOAD = 0x80044715; }; + class ReverseNXSync { public: ReverseNXSync (); @@ -61,182 +65,301 @@ protected: ReverseNXMode RecheckToolMode(); }; + namespace PsmExt { void ChargingHandler(ClockManager* instance); } -class Governor { -public: - Governor(); - ~Governor(); - void Start(); - void Stop(); - void SetMaxHz(uint32_t max_hz, SysClkModule module); - void SetAutoCPUBoost(bool enabled) { m_syscore_autoboost = enabled; }; - void SetCPUBoostHz(uint32_t boost_hz) { m_cpu_freq.boost_hz = boost_hz; }; - void SetPerfConf(uint32_t id); +constexpr uint64_t SAMPLE_RATE = 200; +constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE; +constexpr uint64_t SYSTICK_HZ = 19200000; -protected: - // Parameters for sampling - static constexpr uint64_t SAMPLE_RATE = 200; - static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE; - static constexpr uint64_t SYSTICK_HZ = 19200000; +namespace GovernorImpl { + constexpr uint32_t UTIL_MAX = 1000; - static constexpr int CORE_NUMS = 4; - static constexpr int SYS_CORE_ID = (CORE_NUMS - 1); - - bool m_running = false; - bool m_syscore_autoboost = false; - Thread m_t_cpuworker[CORE_NUMS], m_t_main; - - uint32_t m_nvgpu_field; - uint32_t m_perf_conf_id; - SysClkApmConfiguration *m_apm_conf; - - typedef struct { - SysClkModule module; - uint32_t* hz_list; - uint32_t target_hz; - uint32_t min_hz; - uint32_t max_hz; - uint32_t boost_hz; - uint32_t utilref_hz; - - uint32_t GetNormalizedUtil(uint32_t raw_util); - void SetNextFreq(uint32_t norm_util); - void SetHz(); - void Boost(); - } s_FreqContext; - s_FreqContext m_cpu_freq, m_gpu_freq; - - typedef struct { - Governor* self; - int id; - uint32_t util; - uint64_t tick; - } s_CoreContext; - s_CoreContext m_cpu_core_ctx[CORE_NUMS]; - - // PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c - // Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n - // To approximate D (decay multiplier): - // After 50 ms (if SAMPLE_RATE == 200, 10 samples) - // UTIL_MAX * D^10 ≈ 1 (UTIL_MAX decayed to 1) - // D = 4129 / 8192 - // Util_acc_max = Util_acc_inf = 2012 - static constexpr uint32_t UTIL_MAX = 100'0; - struct s_CpuUtil { - uint32_t util_acc = 0; - - static constexpr uint32_t DECAY_DIVIDENT = 4129; - static constexpr uint32_t DECAY_DIVISOR = 8192; - static constexpr uint32_t UTIL_ACC_MAX = 2012; - - uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); }; - void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; }; - }; - - static void CpuUtilWorker(void* args); - static void Main(void* args); - - // Get max value from a sliding window in O(1) - template - class SWindowMax { - protected: - typedef struct { - T item; - T max; - } s_Entry; - - struct s_Stack { - s_Entry m_stack[WINDOW_SIZE] = {}; - size_t m_next = WINDOW_SIZE; - - bool empty() { return m_next == 0; }; - s_Entry top() { return m_stack[m_next-1]; }; - s_Entry pop() { return m_stack[--m_next]; }; - void push(s_Entry item) { - if (m_next == WINDOW_SIZE) - return; - m_stack[m_next++] = item; - }; + class BaseGovernor { + public: + BaseGovernor(SysClkModule module) : m_module(module) { + m_hz_list = GetModuleFreqTable(module); + m_ref_hz = GetModuleMaximumFreq(module); }; - s_Stack enqStack; - s_Stack deqStack; + uint32_t RefreshContext() { return this->m_target_hz = Clocks::GetCurrentHz(this->m_module); }; - void Push(s_Stack& stack, T item) { - s_Entry n = { - .item = item, - .max = enqStack.empty() ? item : std::max(item, enqStack.top().max) - }; - stack.push(n); - } - - T Pop() { - if (deqStack.empty()) { - while (!enqStack.empty()) - Push(deqStack, enqStack.pop().max); - } - return deqStack.pop().item; - } - - public: - SWindowMax() {} - - void Add(T item) { Pop(); Push(enqStack, item); } - - T Get() { - if (!enqStack.empty()) { - T enqMax = enqStack.top().max; - if (!deqStack.empty()) { - T deqMax = deqStack.top().max; - return std::max(deqMax, enqMax); - } - return enqMax; - } - if (!deqStack.empty()) - return deqStack.top().max; - return 0; - } - }; - - // Get average value from a sliding window in O(1) - template - class SWindowAvg { - public: - SWindowAvg() {} - - void Add(T item) { - T pop = m_queue[m_next]; - m_queue[m_next] = item; - m_next = (m_next + 1) % WINDOW_SIZE; - m_sum -= pop; - m_sum += item; - } - - T Get() { return m_sum / WINDOW_SIZE; } + uint32_t min_hz, max_hz, boost_hz; protected: - size_t m_next = 0; - T m_sum = 0; - T m_queue[WINDOW_SIZE] = {}; + uint32_t CalcNormalizedUtil(uint32_t rawUtil) { + return ((uint64_t)rawUtil * m_target_hz / m_ref_hz); + }; + + void ApplyNewFreqFromNormUtil(uint32_t norm); + + void ApplyTargetFreq(uint32_t hz) { + if (!hz || m_target_hz == hz) + return; + + m_target_hz = hz; + Clocks::SetHz(m_module, hz); + }; + + void ApplyBoost() { + ApplyTargetFreq( + (m_module == SysClkModule_CPU && max_hz > boost_hz) ? max_hz : boost_hz + ); + }; + + SysClkModule m_module; + uint32_t* m_hz_list; + uint32_t m_target_hz, m_ref_hz; + + friend Governor; }; - struct s_GpuUtil { - SWindowMax window {}; + class CpuGovernor : public BaseGovernor { + public: + CpuGovernor(Governor* manager) + : BaseGovernor(SysClkModule_CPU), m_manager(manager) { + boost_hz = Clocks::boostCpuFreq; + m_worker.super = this; + }; - uint32_t util_acc = 0; - // After 160 ms (if SAMPLE_RATE == 200, 32 samples) - // UTIL_MAX * D^32 ≈ 1 (UTIL_MAX decayed to 1) - // D = 6880 / 8192 - // Util_acc_max = Util_acc_inf = 6145 - static constexpr uint32_t DECAY_DIVIDENT = 6880; - static constexpr uint32_t DECAY_DIVISOR = 8192; - static constexpr uint32_t UTIL_ACC_MAX = 6145; + ~CpuGovernor() { this->m_worker.Stop(); }; - uint32_t Get() { return ((util_acc * UTIL_MAX / UTIL_ACC_MAX) + window.Get()) / 2; }; - void Update(uint32_t util) { window.Add(util); util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; }; + void Apply(); + + bool auto_boost; + + protected: + static constexpr int CORE_NUMS = 4; + static constexpr int SYS_CORE_ID = CORE_NUMS - 1; + + // PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c + // Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n + // To approximate D (decay multiplier): + // After 50 ms (if SAMPLE_RATE == 200, 10 samples) + // UTIL_MAX * D^10 ≈ 1 (UTIL_MAX decayed to 1) + // D = 4129 / 8192 + // Util_acc_max = Util_acc_inf = 2012 + typedef struct PeltUtil { + uint32_t util_acc = 0; + + static constexpr uint32_t DECAY_DIVIDENT = 4129; + static constexpr uint32_t DECAY_DIVISOR = 8192; + static constexpr uint32_t UTIL_ACC_MAX = 2012; + + uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); }; + void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; }; + } PeltUtil; + PeltUtil m_util; + + typedef struct { + CpuGovernor*super; + int id; + uint32_t util; + uint64_t tick; + + static void Loop(void* args); + } WorkerContext; + + typedef struct GovernorWorker { + Thread threads[CORE_NUMS]; + WorkerContext contexts[CORE_NUMS]; + bool running; + CpuGovernor* super; + + void Start(); + void Stop(); + + void onConfigUpdated(SysClkOcGovernorConfig config) { + bool expected = (config >> SysClkOcGovernorConfig_CPU_Shift) & 1; + if (expected != running) + expected ? Start() : Stop(); + }; + } GovernorWorker; + GovernorWorker m_worker; + + Governor* m_manager; + + friend Governor; }; + + class GpuGovernor : public BaseGovernor { + public: + GpuGovernor() : BaseGovernor(SysClkModule_GPU) { + min_hz = 153'600'000; + boost_hz = 76'800'000; + + nvInitialize(); + Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu"); + if (R_FAILED(rc)) { + ASSERT_RESULT_OK(rc, "nvOpen"); + nvExit(); + } + }; + + ~GpuGovernor() { + nvClose(m_nvgpu_field); + nvExit(); + }; + + void SetMaxHz(uint32_t maxHz); + + void Apply(); + + protected: + // Get average value from a sliding window in O(1) + template + class SWindowAvg { + public: + SWindowAvg() {} + + void Add(T item) { + T pop = m_queue[m_next]; + m_queue[m_next] = item; + m_next = (m_next + 1) % WINDOW_SIZE; + m_sum -= pop; + m_sum += item; + } + + T Get() { return m_sum / WINDOW_SIZE; } + + protected: + size_t m_next = 0; + T m_sum = 0; + T m_queue[WINDOW_SIZE] = {}; + }; + + // Get max value from a sliding window in O(1) + template + class SWindowMax { + protected: + typedef struct { + T item; + T max; + } s_Entry; + + struct s_Stack { + s_Entry m_stack[WINDOW_SIZE] = {}; + size_t m_next = WINDOW_SIZE; + + bool empty() { return m_next == 0; }; + s_Entry top() { return m_stack[m_next-1]; }; + s_Entry pop() { return m_stack[--m_next]; }; + void push(s_Entry item) { + if (m_next == WINDOW_SIZE) + return; + m_stack[m_next++] = item; + }; + }; + + s_Stack enqStack; + s_Stack deqStack; + + void Push(s_Stack& stack, T item) { + s_Entry n = { + .item = item, + .max = enqStack.empty() ? item : std::max(item, enqStack.top().max) + }; + stack.push(n); + } + + T Pop() { + if (deqStack.empty()) { + while (!enqStack.empty()) + Push(deqStack, enqStack.pop().max); + } + return deqStack.pop().item; + } + + public: + SWindowMax() {} + + void Add(T item) { Pop(); Push(enqStack, item); } + + T Get() { + if (!enqStack.empty()) { + T enqMax = enqStack.top().max; + if (!deqStack.empty()) { + T deqMax = deqStack.top().max; + return std::max(deqMax, enqMax); + } + return enqMax; + } + if (!deqStack.empty()) + return deqStack.top().max; + return 0; + } + }; + + typedef struct MaxWindow { + SWindowMax window {}; + uint32_t util_acc = 0; + + // After 160 ms (if SAMPLE_RATE == 200, 32 samples) + // UTIL_MAX * D^32 ≈ 1 (UTIL_MAX decayed to 1) + // D = 6880 / 8192 + // Util_acc_max = Util_acc_inf = 6145 + static constexpr uint32_t DECAY_DIVIDENT = 6880; + static constexpr uint32_t DECAY_DIVISOR = 8192; + static constexpr uint32_t UTIL_ACC_MAX = 6145; + + uint32_t Get() { return ((util_acc * UTIL_MAX / UTIL_ACC_MAX) + window.Get()) / 2; }; + void Update(uint32_t util) { window.Add(util); util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; }; + } MaxWindow; + MaxWindow m_util; + + uint32_t m_nvgpu_field; + }; + +} + +class Governor { +public: + Governor() { + m_cpu_gov = new GovernorImpl::CpuGovernor(this); + m_gpu_gov = new GovernorImpl::GpuGovernor(); + }; + + ~Governor() { + m_manager.Stop(); + delete m_cpu_gov; + delete m_gpu_gov; + }; + + SysClkOcGovernorConfig GetConfig() { return m_config; }; + bool IsHandledByGovernor(SysClkModule module = SysClkModule_EnumMax); + void SetConfig(SysClkOcGovernorConfig config); + + void SetPerfConf(uint32_t id); + uint32_t GetPerfConf() { return m_perf_conf_id; }; + + void SetMaxHz(uint32_t maxHz, SysClkModule module); + + void SetAutoCPUBoost(bool enabled) { m_cpu_gov->auto_boost = enabled; }; + void SetCPUBoostHz(uint32_t boostHz) { m_cpu_gov->boost_hz = boostHz; }; + +protected: + typedef struct GovernorManager { + bool running = false; + Thread thread; + + void Start(); + void Stop(); + void onConfigUpdated(SysClkOcGovernorConfig config) { + bool shouldRun = (config != SysClkOcGovernorConfig_AllDisabled); + shouldRun ? Start() : Stop(); + }; + static void ContextManager(void* args); + } GovernorManager; + GovernorManager m_manager; + + SysClkOcGovernorConfig m_config = SysClkOcGovernorConfig_AllDisabled; + + uint32_t m_perf_conf_id; + SysClkApmConfiguration* m_apm_conf; + + GovernorImpl::CpuGovernor* m_cpu_gov; + GovernorImpl::GpuGovernor* m_gpu_gov; };